diff --git a/docs/config/_default/menus.toml b/docs/config/_default/menus.toml index c5157c29dce2..5cc6638ff004 100644 --- a/docs/config/_default/menus.toml +++ b/docs/config/_default/menus.toml @@ -91,7 +91,7 @@ name = "YugabyteDB Aeon" weight = 4 parent = "products" - url = "/preview/yugabyte-cloud/" + url = "/stable/yugabyte-cloud/" [main.params] description = "Fully-managed cloud DBaaS" excludeVersion = true @@ -101,7 +101,7 @@ name = "YugabyteDB Voyager" weight = 5 parent = "products" - url = "/preview/yugabyte-voyager/" + url = "/stable/yugabyte-voyager/" [main.params] description = "Database migration service" excludeVersion = true @@ -110,14 +110,14 @@ [[main]] name = "Tutorials" weight = 2 - url = "/preview/tutorials/" + url = "/stable/tutorials/" [main.params] excludeVersion = true [[main]] name = "Integrations" weight = 3 - url = "/preview/integrations/" + url = "/stable/integrations/" [main.params] excludeVersion = true @@ -208,7 +208,7 @@ [[main]] name = "FAQ" weight = 5 - url = "/preview/faq/general/" + url = "/stable/faq/general/" [main.params] excludeVersion = true @@ -216,7 +216,7 @@ name = "Releases" weight = 6 identifier = "releases" - url = "/preview/releases/" + url = "/stable/releases/" [main.params] excludeVersion = true @@ -225,7 +225,7 @@ weight = 1 identifier = "yugabytedb-releases" parent = "releases" - url = "/preview/releases/ybdb-releases/" + url = "/stable/releases/ybdb-releases/" [main.params] excludeVersion = true imageUrl = "/icons/database.svg" @@ -235,7 +235,7 @@ weight = 2 identifier = "yugabytedb-anywhere-releases" parent = "releases" - url = "/preview/releases/yba-releases/" + url = "/stable/releases/yba-releases/" [main.params] excludeVersion = true imageUrl = "/icons/server.svg" @@ -245,7 +245,7 @@ weight = 3 identifier = "yugabytedb-managed-releases" parent = "releases" - url = "/preview/yugabyte-cloud/release-notes/" + url = "/stable/yugabyte-cloud/release-notes/" [main.params] excludeVersion = true imageUrl = "/icons/cloud.svg" @@ -255,7 +255,7 @@ weight = 4 identifier = "yugabytedb-voyager-releases" parent = "releases" - url = "/preview/yugabyte-voyager/release-notes/" + url = "/stable/yugabyte-voyager/release-notes/" [main.params] excludeVersion = true imageUrl = "/images/migrate/migration-icon.svg" @@ -265,7 +265,7 @@ weight = 5 identifier = "yugabytedb-client-releases" parent = "releases" - url = "/preview/releases/yugabyte-clients/" + url = "/stable/releases/yugabyte-clients/" [main.params] excludeVersion = true imageUrl = "/icons/api.svg" @@ -275,7 +275,7 @@ weight = 6 identifier = "tech-advisories-menu" parent = "releases" - url = "/preview/releases/techadvisories/" + url = "/stable/releases/techadvisories/" [main.params] excludeVersion = true imageUrl = "/icons/triangle-exclamation-thin.svg" @@ -368,7 +368,7 @@ weight = 4 identifier = "quick-start" pre = "" - url = "/preview/quick-start-yugabytedb-managed/" + url = "/stable/quick-start-yugabytedb-managed/" [home.params] classes = "separator spacing no-transform" showSection = true @@ -406,7 +406,7 @@ weight = 12 identifier = "yugabytedb-managed" pre = "" - url = "/preview/yugabyte-cloud/" + url = "/stable/yugabyte-cloud/" [home.params] classes = "no-transform" showSection = true @@ -417,7 +417,7 @@ weight = 13 identifier = "migrate" pre = "" - url = "/preview/yugabyte-voyager/" + url = "/stable/yugabyte-voyager/" [home.params] classes = "no-transform" showSection = true @@ -444,7 +444,7 @@ [[products]] name = "YugabyteDB Aeon" weight = 4 - url = "/preview/yugabyte-cloud/" + url = "/stable/yugabyte-cloud/" [products.params] description = "Fully-managed cloud DBaaS" excludeVersion = true @@ -453,7 +453,7 @@ [[products]] name = "YugabyteDB Voyager" weight = 5 - url = "/preview/yugabyte-voyager/" + url = "/stable/yugabyte-voyager/" [products.params] description = "Database migration service" excludeVersion = true @@ -461,790 +461,595 @@ ########## Menus for preview -[[preview]] +[[stable]] weight = 1 identifier = "products-dropdown" - [preview.params] + [stable.params] productsDropdown = true -[[preview]] +[[stable]] name = "Overview" weight = 2 identifier = "yugabytedb" - url = "/preview/" - [preview.params] + url = "/stable/" + [stable.params] showSection = true -# [[preview]] +# [[stable]] # name = "Key benefits" # weight = 3 # identifier = "features" -# url = "/preview/features/" -# [preview.params] +# url = "/stable/features/" +# [stable.params] # showSection = true # hideChildren = true -# [[preview]] +# [[stable]] # name = "Resource guide" # weight = 4 # identifier = "get-started-guide" -# url = "/preview/get-started-guide/" -# [preview.params] +# url = "/stable/get-started-guide/" +# [stable.params] # showSection = true # hideChildren = true -[[preview]] +[[stable]] name = "Quick Start" weight = 5 identifier = "quick-start" - url = "/preview/quick-start/macos/" - [preview.params] + url = "/stable/quick-start/macos/" + [stable.params] showSection = true hideChildren = true -[[preview]] +[[stable]] name = "Explore" weight = 6 identifier = "explore" - url = "/preview/explore/" - [preview.params] + url = "/stable/explore/" + [stable.params] showSection = true -[[preview]] +[[stable]] name = "Develop" weight = 7 identifier = "develop" - url = "/preview/develop/" - [preview.params] + url = "/stable/develop/" + [stable.params] classes = "separator" showSection = true -[[preview]] +[[stable]] name = "Secure" weight = 8 identifier = "secure" - url = "/preview/secure/" - [preview.params] + url = "/stable/secure/" + [stable.params] showSection = true -[[preview]] +[[stable]] name = "Launch and Manage" weight = 9 identifier = "launch-and-manage" - url = "/preview/launch-and-manage/" - [preview.params] + url = "/stable/launch-and-manage/" + [stable.params] showSection = true -[[preview]] +[[stable]] name = "Reference" weight = 10 identifier = "reference" - [preview.params] + [stable.params] showSection = true -[[preview]] +[[stable]] name = "Benchmark" weight = 11 identifier = "benchmark" - url = "/preview/benchmark/" - [preview.params] + url = "/stable/benchmark/" + [stable.params] classes = "separator" showSection = true -[[preview]] +[[stable]] name = "Misc" identifier = "misc" parent = "reference" -[[preview]] +[[stable]] name = "Contribute" weight = 13 identifier = "contribute" - url = "/preview/contribute/" - [preview.params] + url = "/stable/contribute/" + [stable.params] showSection = true ########## Menus (in preview) for Integrations -# [[preview_integrations]] +# [[stable_integrations]] # name = "Docs Home" # weight = 1 # identifier = "home" # url = "/" -# [preview_integrations.params] +# [stable_integrations.params] # classes = "separator" # showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Integrations" weight = 2 identifier = "integrations" - url = "/preview/integrations/" - [preview_integrations.params] + url = "/stable/integrations/" + [stable_integrations.params] classes = "separator" showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Drivers and ORMs" weight = 3 - url = "/preview/drivers-orms/" - [preview_integrations.params] + url = "/stable/drivers-orms/" + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Schema migration" weight = 4 identifier = "schema-migration" - [preview_integrations.params] + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Data migration" weight = 5 identifier = "data-migration" - [preview_integrations.params] + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Data integration" weight = 6 identifier = "data-integration" - [preview_integrations.params] + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "GUI clients" weight = 7 identifier = "tools" - url = "/preview/integrations/tools/" - [preview_integrations.params] + url = "/stable/integrations/tools/" + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Application frameworks" weight = 8 identifier = "application-frameworks" - [preview_integrations.params] + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Development platforms" weight = 9 identifier = "development-platforms" - [preview_integrations.params] + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Data discovery" weight = 10 identifier = "data-discovery" - [preview_integrations.params] + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Security" weight = 11 identifier = "integrations-security" - [preview_integrations.params] + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Applications" weight = 12 identifier = "integrations-applications" - [preview_integrations.params] + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Infrastructure" weight = 13 identifier = "integrations-platforms" - [preview_integrations.params] + [stable_integrations.params] showSection = true -[[preview_integrations]] +[[stable_integrations]] name = "Other" weight = 14 identifier = "integrations-other" - [preview_integrations.params] + [stable_integrations.params] showSection = true ########## Menus (in preview) for Tutorials -# [[preview_tutorials]] +# [[stable_tutorials]] # name = "Docs Home" # weight = 1 # identifier = "home" # url = "/" -# [preview_tutorials.params] +# [stable_tutorials.params] # showSection = true -[[preview_tutorials]] +[[stable_tutorials]] name = "Tutorials" weight = 3 identifier = "tutorials" - url = "/preview/tutorials/" - [preview_tutorials.params] + url = "/stable/tutorials/" + [stable_tutorials.params] classes = "separator" showSection = true -# [[preview_tutorials]] +# [[stable_tutorials]] # name = "Quick Start" # weight = 4 # identifier = "quick-start" -# url = "/preview/quick-start-yugabytedb-managed/" -# [preview_tutorials.params] +# url = "/stable/quick-start-yugabytedb-managed/" +# [stable_tutorials.params] # showSection = true # hideChildren = true -[[preview_tutorials]] +[[stable_tutorials]] name = "Hello world" weight = 5 identifier = "build-apps" - url = "/preview/tutorials/build-apps/" - [preview_tutorials.params] + url = "/stable/tutorials/build-apps/" + [stable_tutorials.params] showSection = true -[[preview_tutorials]] +[[stable_tutorials]] name = "Build and learn" weight = 10 identifier = "tutorials-build-and-learn" - url = "/preview/tutorials/build-and-learn/" - [preview_tutorials.params] + url = "/stable/tutorials/build-and-learn/" + [stable_tutorials.params] showSection = true -[[preview_tutorials]] +[[stable_tutorials]] name = "AI" weight = 20 identifier = "tutorials-ai" - url = "/preview/tutorials/ai/" - [preview_tutorials.params] + url = "/stable/tutorials/ai/" + [stable_tutorials.params] showSection = true -[[preview_tutorials]] +[[stable_tutorials]] name = "RAG" weight = 10 identifier = "tutorials-ai-rag" parent = "tutorials-ai" - [preview_tutorials.params] + [stable_tutorials.params] showSection = true -[[preview_tutorials]] +[[stable_tutorials]] name = "Vector basics" weight = 20 identifier = "tutorials-ai-vector" parent = "tutorials-ai" - [preview_tutorials.params] + [stable_tutorials.params] showSection = true -[[preview_tutorials]] +[[stable_tutorials]] name = "Agentic" weight = 30 identifier = "tutorials-ai-agentic" parent = "tutorials-ai" - [preview_tutorials.params] + [stable_tutorials.params] showSection = true -[[preview_tutorials]] +[[stable_tutorials]] name = "Cloud" weight = 30 identifier = "tutorials-cloud" - [preview_tutorials.params] + [stable_tutorials.params] showSection = true -[[preview_tutorials]] +[[stable_tutorials]] name = "CDC" weight = 40 identifier = "tutorials-cdc" - [preview_tutorials.params] + [stable_tutorials.params] showSection = true ########## Menus (in preview) for APIs -# [[preview_api]] +# [[stable_api]] # name = "Docs Home" # weight = 1 # identifier = "home" # url = "/" -# [preview_api.params] +# [stable_api.params] # classes = "separator" # showSection = true -[[preview_api]] +[[stable_api]] name = "API" weight = 2 identifier = "api" - url = "/preview/api/" - [preview_api.params] + url = "/stable/api/" + [stable_api.params] classes = "separator" showSection = true -[[preview_api]] +[[stable_api]] name = "YSQL" weight = 10 identifier = "api-ysql" - url = "/preview/api/ysql/" - [preview_api.params] + url = "/stable/api/ysql/" + [stable_api.params] showSection = true -[[preview_api]] +[[stable_api]] name = "Supporting language elements" weight = 50 identifier = "ysql-language-elements" parent = "api-ysql" - [preview_api.params] + [stable_api.params] showSection = true -[[preview_api]] +[[stable_api]] name = "YCQL" weight = 11 identifier = "api-cassandra" - url = "/preview/api/ycql/" - [preview_api.params] + url = "/stable/api/ycql/" + [stable_api.params] classes = "separator" showSection = true -[[preview_api]] +[[stable_api]] name = "ysqlsh" weight = 20 identifier = "ysqlsh" - url = "/preview/api/ysqlsh/" - [preview_api.params] + url = "/stable/api/ysqlsh/" + [stable_api.params] showSection = true -[[preview_api]] +[[stable_api]] name = "ycqlsh" weight = 21 identifier = "ycqlsh" - url = "/preview/api/ycqlsh/" - [preview_api.params] + url = "/stable/api/ycqlsh/" + [stable_api.params] classes = "separator" showSection = true -[[preview_api]] +[[stable_api]] name = "YugabyteDB Anywhere API" weight = 30 identifier = "yugabytedb-anywhere-api" - url = "/preview/yugabyte-platform/anywhere-automation/anywhere-api/" - [preview_api.params] + url = "/stable/yugabyte-platform/anywhere-automation/anywhere-api/" + [stable_api.params] showSection = true -[[preview_api]] +[[stable_api]] name = "YugabyteDB Aeon API" weight = 31 identifier = "yugabytedb-managed-api" - url = "/preview/yugabyte-cloud/managed-automation/managed-api/" - [preview_api.params] + url = "/stable/yugabyte-cloud/managed-automation/managed-api/" + [stable_api.params] showSection = true ########## Menus (in preview) for releases section -# [[preview_releases]] +# [[stable_releases]] # name = "Docs Home" # weight = 1 # identifier = "home" # url = "/" -# [preview_releases.params] +# [stable_releases.params] # classes = "separator" # showSection = true -[[preview_releases]] +[[stable_releases]] name = "Releases" weight = 1 identifier = "releases" - url = "/preview/releases/" - [preview_releases.params] + url = "/stable/releases/" + [stable_releases.params] classes = "separator" showSection = true -[[preview_releases]] +[[stable_releases]] name = "YugabyteDB" weight = 2 identifier = "ybdb-releases" - url = "/preview/releases/ybdb-releases/" - [preview_releases.params] + url = "/stable/releases/ybdb-releases/" + [stable_releases.params] showSection = true -[[preview_releases]] +[[stable_releases]] name = "End of life and older previews" parent = "ybdb-releases" weight = 9999 identifier = "end-of-life" - [preview_releases.params] + [stable_releases.params] showSection = true -[[preview_releases]] +[[stable_releases]] name = "YugabyteDB Anywhere" weight = 3 identifier = "yba-releases" - url = "/preview/releases/yba-releases/" - [preview_releases.params] + url = "/stable/releases/yba-releases/" + [stable_releases.params] showSection = true -[[preview_releases]] +[[stable_releases]] name = "YugabyteDB Aeon" weight = 4 identifier = "ybm-releases" - url = "/preview/yugabyte-cloud/release-notes/" - [preview_releases.params] + url = "/stable/yugabyte-cloud/release-notes/" + [stable_releases.params] showSection = true -[[preview_releases]] +[[stable_releases]] name = "YugabyteDB Voyager" weight = 5 identifier = "voy-releases" - url = "/preview/yugabyte-voyager/release-notes/" - [preview_releases.params] + url = "/stable/yugabyte-voyager/release-notes/" + [stable_releases.params] showSection = true imageUrl = "/images/migrate/migration-icon.svg" -[[preview_releases]] +[[stable_releases]] name = "YugabyteDB Clients" weight = 6 identifier = "yb-clients" - url = "/preview/releases/yugabyte-clients/" - [preview_releases.params] + url = "/stable/releases/yugabyte-clients/" + [stable_releases.params] classes = "separator" showSection = true -[[preview_releases]] +[[stable_releases]] name = "Versioning" weight = 10 identifier = "release-versioning" - url = "/preview/releases/versioning/" - [preview_releases.params] + url = "/stable/releases/versioning/" + [stable_releases.params] showSection = true -[[preview_releases]] +[[stable_releases]] name = "Tech Advisories" weight = 11 identifier = "tech-advisories" - url = "/preview/releases/techadvisories/" - [preview_releases.params] + url = "/stable/releases/techadvisories/" + [stable_releases.params] showSection = true ########## Menus (in preview) for FAQ section -# [[preview_faq]] +# [[stable_faq]] # name = "Docs Home" # weight = 1 # identifier = "home" # url = "/" -# [preview_faq.params] +# [stable_faq.params] # classes = "separator" # showSection = true -[[preview_faq]] +[[stable_faq]] name = "FAQ" weight = 2 identifier = "faq" - [preview_faq.params] + [stable_faq.params] classes = "separator" showSection = true hideLink = true -[[preview_faq]] +[[stable_faq]] name = "Comparisons" weight = 3 identifier = "comparisons-home" - [preview_faq.params] + [stable_faq.params] showSection = true hideLink = true ########## Menus (in preview) for YB Anywhere section -[[preview_yugabyte-platform]] +[[stable_yugabyte-platform]] weight = 1 - [preview_yugabyte-platform.params] + [stable_yugabyte-platform.params] productsDropdown = true -[[preview_yugabyte-platform]] +[[stable_yugabyte-platform]] name = "Overview" weight = 2 identifier = "yugabytedb-anywhere" - [preview_yugabyte-platform.params] + [stable_yugabyte-platform.params] showSection = true hideLink = true ########## Menus (in preview) for YB Managed section -[[preview_yugabyte-cloud]] +[[stable_yugabyte-cloud]] weight = 1 - [preview_yugabyte-cloud.params] + [stable_yugabyte-cloud.params] productsDropdown = true -[[preview_yugabyte-cloud]] +[[stable_yugabyte-cloud]] name = "Overview" weight = 2 identifier = "yugabytedb-managed" - [preview_yugabyte-cloud.params] + [stable_yugabyte-cloud.params] classes = "separator" showSection = true hideLink = true -# [[preview_yugabyte-cloud]] +# [[stable_yugabyte-cloud]] # name = "What's New" # weight = 4 # identifier = "yugabytedb-managed-releases" -# url = "/preview/yugabyte-cloud/release-notes/" -# [preview_yugabyte-cloud.params] +# url = "/stable/yugabyte-cloud/release-notes/" +# [stable_yugabyte-cloud.params] # showSection = true ########## Menus (in preview) for YB Voyager section -[[preview_yugabyte-voyager]] +[[stable_yugabyte-voyager]] weight = 1 - [preview_yugabyte-voyager.params] + [stable_yugabyte-voyager.params] productsDropdown = true -[[preview_yugabyte-voyager]] +[[stable_yugabyte-voyager]] name = "Overview" weight = 2 identifier = "yugabytedb-voyager" - [preview_yugabyte-voyager.params] + [stable_yugabyte-voyager.params] showSection = true hideLink = true -[[preview_yugabyte-voyager]] +[[stable_yugabyte-voyager]] name = "Schema migration" parent = "yb-voyager-cli" weight = 3 identifier = "schema-migration" - [preview_yugabyte-voyager.params] + [stable_yugabyte-voyager.params] showSection = true hideLink = true -[[preview_yugabyte-voyager]] +[[stable_yugabyte-voyager]] name = "Data migration" parent = "yb-voyager-cli" weight = 4 identifier = "data-migration" - [preview_yugabyte-voyager.params] + [stable_yugabyte-voyager.params] showSection = true hideLink = true -[[preview_yugabyte-voyager]] +[[stable_yugabyte-voyager]] name = "Cutover and archive" parent = "yb-voyager-cli" weight = 5 identifier = "cutover-archive" - [preview_yugabyte-voyager.params] + [stable_yugabyte-voyager.params] showSection = true hideLink = true -[[preview_yugabyte-voyager]] +[[stable_yugabyte-voyager]] name = "Bulk data load" parent = "yb-voyager-cli" weight = 6 identifier = "bulk-data-load-ref" - [preview_yugabyte-voyager.params] + [stable_yugabyte-voyager.params] showSection = true hideLink = true -[[preview_yugabyte-voyager]] +[[stable_yugabyte-voyager]] name = "Misc" identifier = "yb-voyager-misc" parent = "reference-voyager" weight = 120 - [preview_yugabyte-voyager.params] - showSection = true - hideLink = true - -########## Menus for stable - -[[stable]] - weight = 1 - [stable.params] - productsDropdown = true - -[[stable]] - name = "Overview" - weight = 2 - identifier = "yugabytedb" - url = "/stable/" - [stable.params] - showSection = true - -[[stable]] - name = "Quick Start" - weight = 4 - identifier = "quick-start" - url = "/preview/quick-start/macos/" - [stable.params] - showSection = true - hideChildren = true - -[[stable]] - name = "Explore" - weight = 5 - identifier = "explore" - url = "/stable/explore/" - [stable.params] - showSection = true - -[[stable]] - name = "Develop" - weight = 6 - identifier = "develop" - url = "/preview/develop/" - [stable.params] - classes = "separator" - showSection = true - -#[[stable]] -# name = "Tutorials" -# weight = 7 -# identifier = "tutorials" -# [stable.params] -# showSection = true - -# [[stable]] -# name = "Migrate" -# weight = 6 -# identifier = "migrate" -# url = "/stable/migrate/" -# [stable.params] -# showSection = true - -[[stable]] - name = "Secure" - weight = 8 - identifier = "secure" - url = "/stable/secure/" - [stable.params] - showSection = true - -[[stable]] - name = "Launch and Manage" - weight = 9 - identifier = "launch-and-manage" - url = "/stable/launch-and-manage/" - [stable.params] - showSection = true - -[[stable]] - name = "Reference" - weight = 10 - identifier = "reference" - [stable.params] - showSection = true - -[[stable]] - name = "Troubleshoot" - parent = "launch-and-manage" - url = "/preview/troubleshoot/" - -[[stable]] - name = "Benchmark" - weight = 11 - identifier = "benchmark" - url = "/stable/benchmark/" - [stable.params] - classes = "separator" - showSection = true - -[[stable]] - name = "Contribute" - weight = 13 - identifier = "contribute" - url = "/preview/contribute/" - [stable.params] - showSection = true - - -########## Menus (in stable) for YB Anywhere section - -[[stable_yugabyte-platform]] - weight = 1 - [stable_yugabyte-platform.params] - productsDropdown = true - -[[stable_yugabyte-platform]] - name = "Overview" - weight = 2 - identifier = "yugabytedb-anywhere" - [stable_yugabyte-platform.params] + [stable_yugabyte-voyager.params] showSection = true hideLink = true -########## Menus (in stable) for APIs - -# [[stable_api]] -# name = "Docs Home" -# weight = 1 -# identifier = "home" -# url = "/" -# [stable_api.params] -# classes = "separator" -# showSection = true - -[[stable_api]] - name = "API" - weight = 2 - identifier = "api" - url = "/stable/api/" - [stable_api.params] - classes = "separator" - showSection = true - -[[stable_api]] - name = "YSQL" - weight = 10 - identifier = "api-ysql" - url = "/stable/api/ysql/" - [stable_api.params] - showSection = true - -[[stable_api]] - name = "Supporting language elements" - weight = 50 - identifier = "ysql-language-elements" - parent = "api-ysql" - [stable_api.params] - showSection = true - -[[stable_api]] - name = "YCQL" - weight = 11 - identifier = "api-cassandra" - url = "/stable/api/ycql/" - [stable_api.params] - classes = "separator" - showSection = true - -[[stable_api]] - name = "ysqlsh" - weight = 20 - identifier = "ysqlsh" - url = "/stable/api/ysqlsh/" - [stable_api.params] - showSection = true - -[[stable_api]] - name = "ycqlsh" - weight = 21 - identifier = "ycqlsh" - url = "/stable/api/ycqlsh/" - [stable_api.params] - classes = "separator" - showSection = true - -[[stable_api]] - name = "YugabyteDB Anywhere API" - weight = 30 - identifier = "yugabytedb-anywhere-api" - url = "/stable/yugabyte-platform/anywhere-automation/anywhere-api/" - [stable_api.params] - showSection = true - -[[stable_api]] - name = "YugabyteDB Aeon API" - weight = 31 - identifier = "yugabytedb-managed-api" - url = "/preview/yugabyte-cloud/managed-automation/managed-api/" - [stable_api.params] - showSection = true - ########## Menus for "v2024.2" [["v2024.2"]] @@ -1264,7 +1069,7 @@ name = "Quick Start" weight = 4 identifier = "quick-start" - url = "/preview/quick-start/macos/" + url = "/stable/quick-start/macos/" ["v2024.2".params] showSection = true hideChildren = true @@ -1281,7 +1086,7 @@ name = "Develop" weight = 6 identifier = "develop" - url = "/preview/develop/" + url = "/stable/develop/" ["v2024.2".params] classes = "separator" showSection = true @@ -1312,7 +1117,7 @@ [["v2024.2"]] name = "Troubleshoot" parent = "launch-and-manage" - url = "/preview/troubleshoot/" + url = "/stable/troubleshoot/" [["v2024.2"]] name = "Benchmark" @@ -1327,7 +1132,7 @@ name = "Contribute" weight = 13 identifier = "contribute" - url = "/preview/contribute/" + url = "/stable/contribute/" ["v2024.2".params] showSection = true @@ -1403,7 +1208,7 @@ name = "YugabyteDB Aeon API" weight = 31 identifier = "yugabytedb-managed-api" - url = "/preview/yugabyte-cloud/managed-automation/managed-api/" + url = "/stable/yugabyte-cloud/managed-automation/managed-api/" ["v2024.2_api".params] showSection = true @@ -1426,7 +1231,7 @@ name = "Quick Start" weight = 4 identifier = "quick-start" - url = "/preview/quick-start/macos/" + url = "/stable/quick-start/macos/" ["v2024.1".params] showSection = true hideChildren = true @@ -1443,7 +1248,7 @@ name = "Develop" weight = 6 identifier = "develop" - url = "/preview/develop/" + url = "/stable/develop/" ["v2024.1".params] classes = "separator" showSection = true @@ -1474,7 +1279,7 @@ [["v2024.1"]] name = "Troubleshoot" parent = "launch-and-manage" - url = "/preview/troubleshoot/" + url = "/stable/troubleshoot/" [["v2024.1"]] name = "Benchmark" @@ -1489,7 +1294,7 @@ name = "Contribute" weight = 13 identifier = "contribute" - url = "/preview/contribute/" + url = "/stable/contribute/" ["v2024.1".params] showSection = true @@ -1566,7 +1371,7 @@ name = "YugabyteDB Aeon API" weight = 31 identifier = "yugabytedb-managed-api" - url = "/preview/yugabyte-cloud/managed-automation/managed-api/" + url = "/stable/yugabyte-cloud/managed-automation/managed-api/" ["v2024.1_api".params] showSection = true @@ -1589,7 +1394,7 @@ name = "Quick Start" weight = 4 identifier = "quick-start" - url = "/preview/quick-start-yugabytedb-managed/" + url = "/stable/quick-start-yugabytedb-managed/" ["v2025.1".params] showSection = true hideChildren = true @@ -1606,7 +1411,7 @@ name = "Develop" weight = 6 identifier = "develop" - url = "/preview/develop/" + url = "/stable/develop/" ["v2025.1".params] classes = "separator" showSection = true @@ -1637,7 +1442,7 @@ [["v2025.1"]] name = "Troubleshoot" parent = "launch-and-manage" - url = "/preview/troubleshoot/" + url = "/stable/troubleshoot/" [["v2025.1"]] name = "Benchmark" @@ -1652,7 +1457,7 @@ name = "Contribute" weight = 13 identifier = "contribute" - url = "/preview/contribute/" + url = "/stable/contribute/" ["v2025.1".params] showSection = true @@ -1728,7 +1533,7 @@ name = "YugabyteDB Aeon API" weight = 31 identifier = "yugabytedb-managed-api" - url = "/preview/yugabyte-cloud/managed-automation/managed-api/" + url = "/stable/yugabyte-cloud/managed-automation/managed-api/" ["v2025.1_api".params] showSection = true @@ -1751,7 +1556,7 @@ name = "Quick Start" weight = 3 identifier = "quick-start" - url = "/preview/quick-start/macos/" + url = "/stable/quick-start/macos/" ["v2.20".params] showSection = true hideChildren = true @@ -1768,7 +1573,7 @@ name = "Develop" weight = 6 identifier = "develop" - url = "/preview/develop/" + url = "/stable/develop/" ["v2.20".params] classes = "separator" showSection = true @@ -1799,7 +1604,7 @@ [["v2.20"]] name = "Troubleshoot" parent = "launch-and-manage" - url = "/preview/troubleshoot/" + url = "/stable/troubleshoot/" [["v2.20"]] name = "Benchmark" @@ -1814,7 +1619,7 @@ name = "Contribute" weight = 13 identifier = "contribute" - url = "/preview/contribute/" + url = "/stable/contribute/" ["v2.20".params] showSection = true @@ -1910,6 +1715,168 @@ name = "YugabyteDB Aeon API" weight = 31 identifier = "yugabytedb-managed-api" - url = "/preview/yugabyte-cloud/managed-automation/managed-api/" + url = "/stable/yugabyte-cloud/managed-automation/managed-api/" ["v2.20_api".params] showSection = true + +########## Menus for v2.25 + +[["v2.25"]] + weight = 1 + ["v2.25".params] + productsDropdown = true + +[["v2.25"]] + name = "Overview" + weight = 2 + identifier = "yugabytedb" + url = "/v2.25/" + ["v2.25".params] + showSection = true + +[["v2.25"]] + name = "Quick Start" + weight = 4 + identifier = "quick-start" + url = "/stable/quick-start/macos/" + ["v2.25".params] + showSection = true + hideChildren = true + +[["v2.25"]] + name = "Explore" + weight = 5 + identifier = "explore" + url = "/v2.25/explore/" + ["v2.25".params] + showSection = true + +[["v2.25"]] + name = "Develop" + weight = 6 + identifier = "develop" + url = "/stable/develop/" + ["v2.25".params] + classes = "separator" + showSection = true + +[["v2.25"]] + name = "Secure" + weight = 8 + identifier = "secure" + url = "/v2.25/secure/" + ["v2.25".params] + showSection = true + +[["v2.25"]] + name = "Launch and Manage" + weight = 9 + identifier = "launch-and-manage" + url = "/v2.25/launch-and-manage/" + ["v2.25".params] + showSection = true + +[["v2.25"]] + name = "Reference" + weight = 10 + identifier = "reference" + ["v2.25".params] + showSection = true + +[["v2.25"]] + name = "Troubleshoot" + parent = "launch-and-manage" + url = "/stable/troubleshoot/" + +[["v2.25"]] + name = "Benchmark" + weight = 11 + identifier = "benchmark" + url = "/v2.25/benchmark/" + ["v2.25".params] + classes = "separator" + showSection = true + +[["v2.25"]] + name = "Contribute" + weight = 13 + identifier = "contribute" + url = "/stable/contribute/" + ["v2.25".params] + showSection = true + +########## Menus (in "v2.25") for YB Anywhere section + +[["v2.25_yugabyte-platform"]] + weight = 1 + ["v2.25_yugabyte-platform".params] + productsDropdown = true + +[["v2.25_yugabyte-platform"]] + name = "Overview" + weight = 2 + identifier = "yugabytedb-anywhere" + ["v2.25_yugabyte-platform".params] + showSection = true + hideLink = true + +########## Menus (in v2.25) for APIs + +[["v2.25_api"]] + name = "API" + weight = 2 + identifier = "api" + url = "/v2.25/api/" + ["v2.25_api".params] + classes = "separator" + showSection = true + +[["v2.25_api"]] + name = "YSQL" + weight = 10 + identifier = "api-ysql" + url = "/v2.25/api/ysql/" + ["v2.25_api".params] + showSection = true + +[["v2.25_api"]] + name = "YCQL" + weight = 11 + identifier = "api-cassandra" + url = "/v2.25/api/ycql/" + ["v2.25_api".params] + classes = "separator" + showSection = true + +[["v2.25_api"]] + name = "ysqlsh" + weight = 20 + identifier = "ysqlsh" + url = "/v2.25/api/ysqlsh/" + ["v2.25_api".params] + showSection = true + +[["v2.25_api"]] + name = "ycqlsh" + weight = 21 + identifier = "ycqlsh" + url = "/v2.25/api/ycqlsh/" + ["v2.25_api".params] + classes = "separator" + showSection = true + +[["v2.25_api"]] + name = "YugabyteDB Anywhere API" + weight = 30 + identifier = "yugabytedb-anywhere-api" + url = "/v2.25/yugabyte-platform/anywhere-automation/anywhere-api/" + ["v2.25_api".params] + showSection = true + +[["v2.25_api"]] + name = "YugabyteDB Aeon API" + weight = 31 + identifier = "yugabytedb-managed-api" + url = "/stable/yugabyte-cloud/managed-automation/managed-api/" + ["v2.25_api".params] + showSection = true diff --git a/docs/config/_default/params.toml b/docs/config/_default/params.toml index 0006bc8ceb9c..992144e2546b 100644 --- a/docs/config/_default/params.toml +++ b/docs/config/_default/params.toml @@ -55,7 +55,7 @@ version_menu_pagelinks = true url = "/stable" version = "v2025.1 (STS)" [[versions]] - url = "/preview" + url = "/v2.25" version = "v2.25 (Preview)" [[versions]] url = "/v2024.2" @@ -75,7 +75,7 @@ version_menu_pagelinks = true [yb] terms_of_service = "https://www.yugabyte.com/terms-of-service/" preview_version = "v2.25" - preview_version_slug = "preview" + preview_version_slug = "v2.25" # To disable heading/title icons for particular page, define `hideHeadingIcon: true` on that page params. heading_icons = true diff --git a/docs/content/_index.md b/docs/content/_index.md index 6b331ddfd350..93b8b04d3605 100644 --- a/docs/content/_index.md +++ b/docs/content/_index.md @@ -22,7 +22,7 @@ YugabyteDB provides PostgreSQL without limits and is an excellent fit for new or title="Get Started" description="Create your first cluster and build a sample application in 15 minutes." buttonText="Get started" - buttonUrl="/preview/quick-start-yugabytedb-managed/" + buttonUrl="/stable/quick-start-yugabytedb-managed/" imageAlt="Laptop" imageUrl="/images/homepage/locally-laptop.svg" >}} @@ -30,7 +30,7 @@ YugabyteDB provides PostgreSQL without limits and is an excellent fit for new or title="Modernize and Migrate" description="Streamline all stages of bringing a source to YugabyteDB, including analysis, conversion, migration, and cutover." buttonText="Get started" - buttonUrl="/preview/yugabyte-voyager/introduction/" + buttonUrl="/stable/yugabyte-voyager/introduction/" imageAlt="Cloud" imageUrl="/images/homepage/yugabyte-in-cloud.svg" >}} {{< /sections/2-boxes >}} @@ -40,40 +40,40 @@ YugabyteDB provides PostgreSQL without limits and is an excellent fit for new or title="Explore" description="Explore YugabyteDB's support for cloud-native applications." linkText1="PostgreSQL compatibility" - linkUrl1="/preview/explore/ysql-language-features/" + linkUrl1="/stable/explore/ysql-language-features/" linkText2="Resilience" - linkUrl2="/preview/explore/fault-tolerance/" + linkUrl2="/stable/explore/fault-tolerance/" linkText3="Scalability" - linkUrl3="/preview/explore/linear-scalability/" + linkUrl3="/stable/explore/linear-scalability/" linkText4="Explore more" linkClass4="more" - linkUrl4="/preview/explore/" + linkUrl4="/stable/explore/" >}} {{< sections/3-box-card title="Develop" description="Build global applications using familiar APIs and drivers." linkText1="Global applications" - linkUrl1="/preview/develop/build-global-apps/" + linkUrl1="/stable/develop/build-global-apps/" linkText2="Hybrid and multi-cloud" - linkUrl2="/preview/develop/multi-cloud/" + linkUrl2="/stable/develop/multi-cloud/" linkText3="Drivers and ORMs" - linkUrl3="/preview/drivers-orms/" + linkUrl3="/stable/drivers-orms/" linkText4="Explore more" linkClass4="more" - linkUrl4="/preview/develop/" + linkUrl4="/stable/develop/" >}} {{< sections/3-box-card title="Under the hood" description="Learn about YugabyteDB's modern architecture." linkText1="Query layer" - linkUrl1="/preview/architecture/query-layer/" + linkUrl1="/stable/architecture/query-layer/" linkText2="Storage layer" - linkUrl2="/preview/architecture/docdb/" + linkUrl2="/stable/architecture/docdb/" linkText3="Transactions" - linkUrl3="/preview/architecture/transactions/" + linkUrl3="/stable/architecture/transactions/" linkText4="Explore more" linkClass4="more" - linkUrl4="/preview/architecture/" + linkUrl4="/stable/architecture/" >}} {{< /sections/3-boxes >}} diff --git a/docs/content/preview/_index.md b/docs/content/preview/_index.md deleted file mode 100644 index 9b857e905401..000000000000 --- a/docs/content/preview/_index.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -title: YugabyteDB -description: YugabyteDB documentation is the best source to learn the most in-depth information about the YugabyteDB database, YugabyteDB Aeon, and YugabyteDB Anywhere. -headcontent: Open source cloud-native distributed SQL database -weight: 1 -type: indexpage -breadcrumbDisable: true ---- - -YugabyteDB is an open source PostgreSQL-compatible distributed database for cloud native apps. Resilient, scalable, and flexible, it can be deployed across public and private clouds as well as in Kubernetes environments. - -{{< sections/2-boxes >}} - {{< sections/bottom-image-box - title="Get started locally on your laptop" - description="Download and install YugabyteDB on your laptop, create a cluster, and build a sample application." - buttonText="Quick Start" - buttonUrl="/preview/quick-start/macos/" - imageAlt="Locally Laptop" imageUrl="/images/homepage/locally-laptop.svg" - >}} - - {{< sections/bottom-image-box - title="Explore distributed SQL" - description="Explore the features of distributed SQL, with examples." - buttonText="Explore" - buttonUrl="/preview/explore/" - imageAlt="Yugabyte cloud" imageUrl="/images/homepage/yugabyte-in-cloud.svg" - >}} -{{< /sections/2-boxes >}} - -## Develop for YugabyteDB - -{{< sections/3-boxes>}} - {{< sections/3-box-card - title="Build a Hello World application" - description="Use your favorite programming language to build an application that connects to a YugabyteDB cluster." - buttonText="Build" - buttonUrl="/preview/tutorials/build-apps/" - >}} - - {{< sections/3-box-card - title="Connect using drivers and ORMs" - description="Connect applications to your database using familiar third-party divers and ORMs and YugabyteDB Smart Drivers." - buttonText="Connect" - buttonUrl="/preview/drivers-orms/" - >}} - - {{< sections/3-box-card - title="Use familiar APIs" - description="Get up to speed quickly using YugabyteDB's PostgreSQL-compatible YSQL and Cassandra-based YCQL APIs." - buttonText="Develop" - buttonUrl="/preview/api/" - >}} - -{{< /sections/3-boxes >}} - -## Get under the hood - -{{< sections/3-boxes>}} - {{< sections/3-box-card - title="Architecture" - description="Learn how YugabyteDB achieves consistency and high availability." - buttonText="Learn More" - buttonUrl="/preview/architecture/" - >}} - - {{< sections/3-box-card - title="Secure" - description="Secure YugabyteDB with authentication, authorization, and encryption." - buttonText="Secure" - buttonUrl="/preview/secure/" - >}} - - {{< sections/3-box-card - title="Configure" - description="Configure core database services." - buttonText="Configure" - buttonUrl="/preview/reference/configuration/" - >}} - -{{< /sections/3-boxes >}} diff --git a/docs/content/preview/additional-features/_index.md b/docs/content/preview/additional-features/_index.md deleted file mode 100644 index 8488da6f8372..000000000000 --- a/docs/content/preview/additional-features/_index.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Advanced capabilities -headerTitle: Advanced capabilities -linkTitle: Advanced capabilities -description: How to deploy advanced capabilities for your YugabyteDB deployment. -headcontent: Deploy and manage advanced capabilities for your YugabyteDB universe -menu: - preview: - identifier: additional-features - parent: launch-and-manage - weight: 40 -type: indexpage ---- - -{{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/additional-features/change-data-capture/_index.md b/docs/content/preview/additional-features/change-data-capture/_index.md deleted file mode 100644 index c1aad68d43a8..000000000000 --- a/docs/content/preview/additional-features/change-data-capture/_index.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -title: Change data capture (CDC) -headerTitle: Change data capture (CDC) -linkTitle: Change data capture -description: CDC or Change data capture is a process to capture changes made to data in the database. -headcontent: Capture changes made to data in the database -tags: - feature: early-access -aliases: - - /preview/develop/change-data-capture/ -menu: - preview: - identifier: explore-change-data-capture - parent: additional-features - weight: 30 -type: indexpage ---- - -Change data capture (CDC) is used to determine and track the data that has changed so that action can be taken using the changed data. CDC is used in a number of scenarios: - -- **Microservice-oriented architectures**: Some microservices require a stream of changes to the data, and using CDC in YugabyteDB can provide consumable data changes to CDC subscribers. - -- **Asynchronous replication to remote systems**: Remote systems may subscribe to a stream of data changes and then transform and consume the changes. Maintaining separate database instances for transactional and reporting purposes can be used to manage workload performance. - -- **Multiple data center strategies**: Maintaining multiple data centers enables enterprises to provide high availability (HA). - -- **Compliance and auditing**: Auditing and compliance requirements can require you to use CDC to maintain records of data changes. - -YugabyteDB supports the following methods for reading change events. - -## PostgreSQL Replication Protocol - -This method uses the [PostgreSQL replication protocol](using-logical-replication/key-concepts/#replication-protocols), ensuring compatibility with PostgreSQL CDC systems. Logical replication operates through a publish-subscribe model. It replicates data objects and their changes based on the replication identity. - -It works as follows: - -1. Create Publications in the YugabyteDB cluster similar to PostgreSQL. -1. Deploy the YugabyteDB Connector in your preferred Kafka Connect environment. -1. The connector uses replication slots to capture change events and publishes them directly to a Kafka topic. - -{{}} -Learn about CDC in YugabyteDB using the [PostgreSQL Replication Protocol](./using-logical-replication/). -{{}} - -## YugabyteDB gRPC Replication Protocol - -This method involves setting up a change stream in YugabyteDB that uses the native gRPC replication protocol to publish change events. - -It works as follows: - -1. Establish a change stream in the YugabyteDB cluster using the yb_admin CLI commands. -1. Deploy the YugabyteDB gRPC Connector in your preferred Kafka Connect environment. -1. The connector captures change events using YugabyteDB's native gRPC replication and directly publishes them to a Kafka topic. - -{{}} -Learn about CDC in YugabyteDB using the [gRPC Replication Protocol](./using-yugabytedb-grpc-replication/). -{{}} diff --git a/docs/content/preview/additional-features/change-data-capture/using-logical-replication/_index.md b/docs/content/preview/additional-features/change-data-capture/using-logical-replication/_index.md deleted file mode 100644 index 2e9e4eefa457..000000000000 --- a/docs/content/preview/additional-features/change-data-capture/using-logical-replication/_index.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -title: CDC using PostgreSQL replication protocol -headerTitle: CDC using PostgreSQL replication protocol -linkTitle: PostgreSQL protocol -description: CDC using YugabyteDB PostgreSQL replication protocol. -headcontent: Capture changes made to data in the database -tags: - feature: early-access -aliases: - - /preview/explore/change-data-capture/using-logical-replication/ -menu: - preview: - identifier: explore-change-data-capture-logical-replication - parent: explore-change-data-capture - weight: 240 -type: indexpage -showRightNav: true ---- - -## Overview - -YugabyteDB CDC captures changes made to data in the database and streams those changes to external processes, applications, or other databases. CDC allows you to track and propagate changes in a YugabyteDB database to downstream consumers based on its Write-Ahead Log (WAL). YugabyteDB CDC captures row-level changes resulting from INSERT, UPDATE, and DELETE operations in the configured database and publishes it further to be consumed by downstream applications. - -### Highlights - -#### Resilience - -YugabyteDB CDC with PostgreSQL Logical Replication provides resilience as follows: - -1. Following a failure of the application, server, or network, the replication can continue from any of the available server nodes. - -2. Replication continues from the transaction immediately after the transaction that was last acknowledged by the application. No transactions are missed by the application. - -#### Security - -Because YugabyteDB is using the PostgreSQL Logical Replication model, the following applies: - -- The CDC user persona will be a PostgreSQL replication client. - -- A standard replication connection is used for consumption, and all the server-side configurations for authentication, authorizations, SSL modes, and connection load balancing can be leveraged automatically. - -#### Guarantees - -CDC in YugabyteDB provides the following guarantees. - -| GUARANTEE | DESCRIPTION | -| :----- | :----- | -| Per-slot ordered delivery guarantee | Changes from transactions from all the tables that are part of the replication slot's publication are received in the order they were committed. This also implies ordered delivery across all the tablets that are part of the publication's table list. | -| At least once delivery | Changes from transactions are streamed at least once. Changes from transactions may be streamed again in case of restart after failure. For example, this can happen in the case of a Kafka Connect node failure. If the Kafka Connect node pushes the records to Kafka and crashes before committing the offset, it will again get the same set of records upon restart. | -| No gaps in change stream | Receiving changes that are part of a transaction with commit time *t* implies that you have already received changes from all transactions with commit time lower than *t*. Thus, receiving any change for a row with commit timestamp *t* implies that you have received all older changes for that row. | - -## Key concepts - -The YugabyteDB logical replication feature makes use of PostgreSQL concepts like replication slot, publication, replica identity, and so on. Understanding these key concepts is crucial for setting up and managing a logical replication environment effectively. - -{{}} -Review [key concepts](./key-concepts) of YugabyteDB CDC with logical replication. -{{}} - -## Getting started - -Get started with YugabyteDB logical replication using the YugabyteDB Connector. - -{{}} -[Get started](./get-started) using the connector. -{{}} - -## Monitoring - -You can monitor the activities and status of the deployed connectors using the http end points provided by YugabyteDB. - -{{}} -Learn how to [monitor](./monitor/) your CDC setup. -{{}} - -## YugabyteDB Connector - -To capture and stream your changes in YugabyteDB to an external system, you need a connector that can read the changes in YugabyteDB and stream it out. For this, you can use the YugabyteDB Connector, which is based on the Debezium platform. The connector is deployed as a set of Kafka Connect-compatible connectors, so you first need to define a YugabyteDB connector configuration and then start the connector by adding it to Kafka Connect. - -{{}} -For reference documentation, see [YugabyteDB Connector](./yugabytedb-connector/). -{{}} - -## Limitations - -- Log Sequence Number ([LSN](../using-logical-replication/key-concepts/#lsn-type)) Comparisons Across Slots. - - In the case of YugabyteDB, the LSN  does not represent the byte offset of a WAL record. Hence, arithmetic on LSN and any other usages of the LSN making this assumption will not work. Also, currently, comparison of LSN values from messages coming from different replication slots is not supported. - -- The following functions are currently unsupported: - - - `pg_current_wal_lsn` - - `pg_wal_lsn_diff` - - `IDENTIFY SYSTEM` - - `txid_current` - - `pg_stat_replication` - - Additionally, the functions responsible for pulling changes instead of the server streaming it are unsupported as well. They are described in [Replication Functions](https://www.postgresql.org/docs/15/functions-admin.html#FUNCTIONS-REPLICATION) in the PostgreSQL documentation. - -- Restriction on DDLs - - DDL operations should not be performed from the time of replication slot creation till the start of snapshot consumption of the last table. - -- There should be a primary key on the table you want to stream the changes from. - -- CDC is not supported on tables that are also the target of xCluster replication (see issue {{}}). However, both CDC and xCluster can work simultaneously on the same source tables. - - When performing [switchover](../../../deploy/multi-dc/async-replication/async-transactional-switchover/) or [failover](../../../deploy/multi-dc/async-replication/async-transactional-failover/) on xCluster, if you are using CDC, remember to also reconfigure CDC to use the new primary universe. - -- Currently, CDC doesn't support schema evolution for changes that require table rewrites (for example, [ALTER TYPE](../../../api/ysql/the-sql-language/statements/ddl_alter_table/#alter-type-with-table-rewrite)), or DROP TABLE and TRUNCATE TABLE operations after the replication slot is created. However, you can perform these operations before creating the replication slot without any issues. - -- YCQL tables aren't currently supported. Issue {{}}. - -- Support for point-in-time recovery (PITR) is tracked in issue {{}}. - -- Support for transaction savepoints is tracked in issue {{}}. - -- Support for enabling CDC on Read Replicas is tracked in issue {{}}. - -- Support for tablet splitting with logical replication is disabled from v2024.1.4 and v2024.2.1. Tracked in issue {{}}. - -- A replication slot should be consumed by at most one consumer at a time. However, there is currently no locking mechanism to enforce this. As a result, you should ensure that multiple consumers do not consume from a slot simultaneously. Tracked in issue {{}}. - -- If a row is updated or deleted in the same transaction in which it was inserted, CDC cannot retrieve the before-image values for the UPDATE / DELETE event. If the replica identity is not CHANGE, then CDC will throw an error while processing such events. - - To handle updates/deletes with a non-CHANGE replica identity, set the YB-TServer flag `cdc_send_null_before_image_if_not_exists` to true. With this flag enabled, CDC will send a null before-image instead of failing with an error. diff --git a/docs/content/preview/additional-features/change-data-capture/using-logical-replication/best-practices.md b/docs/content/preview/additional-features/change-data-capture/using-logical-replication/best-practices.md deleted file mode 100644 index 90edd1382272..000000000000 --- a/docs/content/preview/additional-features/change-data-capture/using-logical-replication/best-practices.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: Best Practices for logical replication -headerTitle: Best practices -linkTitle: Best practices -description: Best Practices for logical replication with Change Data Capture in YugabyteDB. -aliases: - - /preview/explore/change-data-capture/using-logical-replication/best-practices/ -menu: - preview: - parent: explore-change-data-capture-logical-replication - identifier: best-practices-cdc - weight: 60 -type: docs ---- - -This section describes best practices to achieve scalability and performance while using CDC with logical replication. - -## Parallel consumption - -The recommended approach towards addressing the requirement of consuming changes in parallel from different tables is to use multiple replication slots. One replication slot per table could be used. Each replication slot is independent of the other and the changes from the tables can be consumed in parallel. - -## Fan out - -Consider the requirement where there are multiple applications, all of them requiring to consume changes from the same table. The recommended approach to address this requirement is to use one replication slot to consume the changes from the table and write the changes to a system like Kafka. The fan out can then be implemented with the multiple applications consuming from Kafka. - -## Load balancing consumption - -An application can connect to any of the YB-TServer nodes to consume from a replication slot. Furthermore, even in case of an interruption, a fresh connection can be made to a different node (different from the node from which consumption was previously happening) to continue consumption from the same replication slot. - -When there are multiple consuming applications each consuming from a different replication slot, it is best that the applications connect to different YB-TServer nodes in the cluster. This ensures better load balancing. The [YugabyteDB smart driver](../../../../drivers-orms/smart-drivers/) does this automatically, so it is recommended that applications use this smart driver. diff --git a/docs/content/preview/additional-features/change-data-capture/using-logical-replication/key-concepts.md b/docs/content/preview/additional-features/change-data-capture/using-logical-replication/key-concepts.md deleted file mode 100644 index d94ec8bb66e2..000000000000 --- a/docs/content/preview/additional-features/change-data-capture/using-logical-replication/key-concepts.md +++ /dev/null @@ -1,136 +0,0 @@ ---- -title: Key concepts - logical replication -headerTitle: Key concepts -linkTitle: Key concepts -description: Change Data Capture in YugabyteDB. -headcontent: PostgreSQL logical replication concepts -aliases: - - /preview/explore/change-data-capture/using-logical-replication/key-concepts/ -menu: - preview: - parent: explore-change-data-capture-logical-replication - identifier: key-concepts - weight: 10 -type: docs ---- - -The YugabyteDB logical replication feature uses [PostgreSQL Logical Replication](https://www.postgresql.org/docs/15/logical-replication.html), which operates using a publish-subscribe model. Understanding the following key concepts will help you set up and manage a logical replication environment effectively. - -## Concepts - -### Replication slot - -A replication slot represents a stream of changes that can be replayed to a client in the order they were made on the origin server. Each slot streams a sequence of changes from a single database. - -In logical replication, the fundamental unit of data transmission is a transaction. A logical slot emits each change just once in normal operation. The current position of each slot is persisted only at checkpoint, so if a replication process is interrupted and restarts, even if the checkpoint or the starting Log Sequence Number ([LSN](#lsn-type)) falls in the middle of a transaction, **the entire transaction is retransmitted**. This behavior guarantees that clients receive complete transactions without missing any intermediate changes, maintaining data integrity across the replication stream​. Logical decoding clients are responsible for avoiding ill effects from handling the same message more than once. Clients may wish to record the last LSN they saw when decoding and skip over any repeated data or (when using the replication protocol) request that decoding start from that LSN rather than letting the server determine the start point. - -For more information, refer to [Replication slots](https://www.postgresql.org/docs/15/logicaldecoding-explanation.html#LOGICALDECODING-REPLICATION-SLOTS) in the PostgreSQL documentation. - -#### LSN type - -A Log Sequence Number (LSN) in YugabyteDB differs from what you may be accustomed to in PostgreSQL. In PostgreSQL, an LSN represents a specific 'location' in the WAL, and has significance that spans databases and replication slots. In YugabyteDB, an LSN uniquely identifies a change event, and the LSN is valid only in the context of a specific replication slot. Due to these differences, there are inherent limitations in how LSNs can be used. - -You can specify the type of LSN to use when you create a replication slot. YugabyteDB currently supports the following types: - -- SEQUENCE - (Default) PostgreSQL-style LSN that is valid in the context of a slot. It is a monotonic increasing number that determines the record in global order in the context of a slot. It can't be compared across two different slots. -- HYBRID_TIME - A hybrid time value which can be used natively with YugabyteDB. HYBRID_TIME is denoted by the HybridTime of the transaction commit record. All the records of the transaction that is streamed will have the same LSN as that of the commit record. You need to ensure that the changes of a transaction are applied in totality and the acknowledgement is sent only if the commit record of a transaction is processed. - -### Publication - -A publication is a set of changes generated from a table or a group of tables, and might also be described as a change set or replication set. Each publication exists in only one database. - -Publications are different from schemas and do not affect how the table is accessed. Each table can be added to multiple publications if needed. Publications may currently only contain tables. Objects must be added explicitly, except when a publication is created for ALL TABLES. - -For more information, refer to [Publication](https://www.postgresql.org/docs/15/logical-replication-publication.html#LOGICAL-REPLICATION-PUBLICATION) in the PostgreSQL documentation. - -### Output plugin - -Output plugins transform the data from the write-ahead log's internal representation into the format that can be consumed by replication clients. These plugins are notified about the change events that need to be processed and sent via various callbacks. These callbacks are only invoked when the transaction actually commits. - -YugabyteDB supports the following four output plugins: - -- `yboutput` -- `pgoutput` -- `test_decoding` -- `wal2json` - -All these plugins are pre-packaged with YugabyteDB and do not require any external installation. - -{{< note title="Note" >}} - -The plugin `yboutput` is YugabyteDB specific. It is similar to `pgoutput` in most aspects. The only difference being that replica identity `CHANGE` is not supported in `pgoutput`. All other plugins support replica identity `CHANGE`. - -{{}} - -For more information, refer to [Logical Decoding Output Plugins](https://www.postgresql.org/docs/15/logicaldecoding-output-plugin.html) in the PostgreSQL documentation. - -### LSN - -LSN (Log Sequence Number) in YugabyteDB is an unsigned 64-bit integer that uniquely identifies a change record or a transaction boundary record that is consumed from a given replication slot. - -In YugabyteDB, LSN values from different slots are considered unrelated and should not be compared. In YugabyteDB, LSN no longer represents the byte offset of a WAL record. - -LSN values for a single replication slot satisfy the following properties: - -- **Uniqueness** - - LSN values for the change and `COMMIT` records for a given replication slot are unique. In particular, changes from different tablets of the same or different tables will have unique LSN values for a replication slot. - -- **Ordering** - - LSN values can be compared ( `<`, `>`, `=` ). - - The LSN of the change records in a transaction will be strictly lower than the LSN of the COMMIT record of the same transaction. - - The LSNs of change records in a transaction will be in increasing order and will correspond to the order in which those changes were made in that transaction. That is, the LSN of an earlier change will have a strictly lower value than the LSN of a later change in the same transaction. This is the case even if the changes correspond to rows in different tablets of the same or different tables. - - For a given replication slot, the LSN of a `COMMIT` record of an earlier transaction will be strictly lower than the LSN of the `COMMIT` record of a later transaction. - -- **Determinism** - - For a given replication slot, the LSN value of a change record (or a transaction boundary record) remains the same for the lifetime of that replication slot. In particular, this is true across server and client restarts and client re-connections. Thus, LSN values for a single replication slot may be used to uniquely identify records that are consumed from that replication slot. The values can be compared for determining duplicates at the client side. - -### Replica identity - -Replica identity is a table-level parameter that controls the amount of information being written to the change records. YugabyteDB supports the following four replica identities: - -- CHANGE (default) -- DEFAULT -- FULL -- NOTHING - -The PostgreSQL replica identity `INDEX` is not supported in YugabyteDB. - -Replica identity `CHANGE` is the best performant and the default replica identity. The replica identity of a table can be changed by performing an ALTER TABLE. However, for a given slot, any ALTER TABLE performed to change the replica identity after the creation of the slot will have no effect. This means that the effective replica identity for any table for a slot, is the replica identity of the table that existed at the time of slot creation. A dynamically created table (a table created after slot creation) will have the default replica identity. For a replica identity modified after slot creation to take effect, a new slot will have to be created after performing the ALTER TABLE. - -The [ysql_yb_default_replica_identity](../../../../reference/configuration/yb-tserver/#ysql-yb-default-replica-identity) flag determines the default replica identity for user tables at the time of table creation. The default value is `CHANGE`. The purpose of this flag is to set the replica identities for dynamically created tables. In order to create a dynamic table with desired replica identity, the flag must be set accordingly and then the table must be created. - -{{< note title="Advisory" >}} -You should refrain from altering the replica identity of a dynamically created table for at least 5 minutes after its creation. -{{< /note >}} - -For more information, refer to [Replica Identity](../yugabytedb-connector/#replica-identity). - -For information on replica identity in PostgreSQL, refer to [REPLICA IDENTITY](https://www.postgresql.org/docs/15/sql-altertable.html#SQL-ALTERTABLE-REPLICA-IDENTITY) in the PostgreSQL documentation. - -### Replication protocols - -PostgreSQL has defined protocols for replication that need to be followed by clients to establish replication connection as well as message structures for streaming data. This includes the [Streaming Replication protocol](https://www.postgresql.org/docs/15/protocol-replication.html) and the [Logical Streaming Replication protocol](https://www.postgresql.org/docs/15/protocol-logical-replication.html). - -The logical streaming replication protocol sends individual transactions one-by-one. This means that all messages between a pair of `BEGIN` and `COMMIT` messages belong to the same transaction. - -YugabyteDB supports both the streaming replication protocols used in PostgreSQL to support logical replication, maintaining the same semantics described in PostgreSQL: - -- Streaming Replication Protocol - This protocol is followed by all output plugins. - -- Logical Streaming Replication Protocol - This protocol is followed by `pgoutput` and `yboutput`, in addition to the Streaming replication protocol. - -{{< note title="Note" >}} - -YugabyteDB does not support Physical Replication. - -{{< /note >}} - -## Learn more - -[CDC using Logical Replication architecture](../../../../architecture/docdb-replication/cdc-logical-replication/) diff --git a/docs/content/preview/additional-features/change-data-capture/using-logical-replication/monitor.md b/docs/content/preview/additional-features/change-data-capture/using-logical-replication/monitor.md deleted file mode 100644 index 4702b6bc4b6e..000000000000 --- a/docs/content/preview/additional-features/change-data-capture/using-logical-replication/monitor.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: CDC monitoring in YugabyteDB -headerTitle: Monitor -linkTitle: Monitor -description: Monitor Change Data Capture in YugabyteDB. -aliases: - - /preview/explore/change-data-capture/using-logical-replication/monitor/ -menu: - preview: - parent: explore-change-data-capture-logical-replication - identifier: monitor - weight: 30 -type: docs ---- - -## Catalog objects and views - -### pg_publication - -Contains all publication objects contained in the database. - -| Column name | Data type | Description | -| :----- | :----- | :----- | -| oid | oid | Row identifier | -| pubname | name | Name of the publication | -| pubowner | oid | OID of the owner. | -| puballtables | bool | If true, this publication includes all tables in the database including those added in the future. | -| pubinsert | bool | If true, INSERT operations are replicated for tables in the publication. | -| pubupdate | bool | If true, UPDATE operations are replicated for tables in the publication. | -| pubdelete | bool | If true, DELETE operations are replicated for tables in the publication. | -| pubtruncate | bool | If true, TRUNCATE operations are replicated for tables in the publication. | - -### pg_publication_rel - -Contains mapping between publications and tables. This is a many-to-many mapping. - -| Column name | Data type | Description | -| :----- | :----- | :----- | -| oid | oid | Row identifier. | -| prpubid | oid | OID of the publication. References pg_publication.oid. | -| prrelid| oid | OID of the relation. References pg_class.oid. | - -### pg_publication_tables - -Contains mapping between publications and tables. It is a wrapper over `pg_publication_rel` as it expands the publications defined as FOR ALL TABLES, so for such publications there will be a row for each eligible table. - -| Column name | Data type | Description | -| :----- | :----- | :----- | -| pubname | name | Name of publication. | -| schemaname | name | Name of schema containing table. | -| tablename | name | Name of table. | - -### pg_replication_slots - -Provides a list of all replication slots that currently exist on the database cluster, along with their metadata. - -| Column name | Data type | Description | -| :----- | :----- | :----- | -| slot_name | name | Name of the replication slot. | -| plugin | name | Output plugin name. | -| slot_type | text | Always logical. | -| datoid | oid | The OID of the database this slot is associated with. | -| database | text | The name of the database this slot is associated with. | -| temporary | boolean | True if this is a temporary replication slot. Temporary slots are automatically dropped on error or when the session has finished. | -| active | boolean | True if this slot is currently actively being used. In YSQL, an "active" replication slot means a slot which has been consumed at least once in a certain time frame. The time is defined using the `ysql_cdc_active_replication_slot_window_ms` flag, which has a default of 5 minutes. | -| active_pid | integer | The process ID of the session using this slot if the slot is currently actively being used. `NULL` if no replication process is ongoing. | -| xmin | xid | The oldest transaction that this slot needs the database to retain. | -| catalog_xmin | xid | Not applicable for YSQL. Always set to xmin. | -| restart_lsn | pg_lsn | The Log Sequence Number ([LSN](../key-concepts/#lsn-type)) of the oldest change record which still might be required by the consumer of this slot and thus won't be automatically removed during checkpoints. | -| confirmed_flush_lsn | pg_lsn | The LSN up to which the logical slot's consumer has confirmed receiving data. Data older than this is not available anymore. Transactions with commit LSN lower than the `confirmed_flush_lsn` are not available anymore. | -| yb_stream_id | text | UUID of the CDC stream | -| yb_restart_commit_ht | int8 | A uint64 representation of the commit Hybrid Time corresponding to the `restart_lsn`. This can be used by the client (like YugabyteDB connector) to perform a consistent snapshot (as of the `consistent_point`) in the case when a replication slot already exists. | - -### pg_stat_replication - -Displays information about active WAL senders, providing insights into the state of replication for each connected standby or logical replication client. - -| Column name | Data type | Description | -| :----- | :----- | :----- | -| pid | integer | Process ID of WAL sender process. | -| usesysid | oid | OID of the user logged into this WAL sender process. | -| usename | name | Name of the user logged into this WAL sender process. | -| application_name | text | Name of the application that is connected to this WAL sender. | -| client_addr | inet | IP address of the client connected to this WAL sender. If this field is null, it indicates that the client is connected via a Unix socket on the server machine. | -| client_hostname | text | Host name of the connected client, as reported by a reverse DNS lookup of client_addr. This field will only be non-null for IP connections, and only when the [log_hostname](https://www.postgresql.org/docs/15/runtime-config-logging.html#GUC-LOG-HOSTNAME) configuration parameter is enabled. | -| client_port | integer | TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used. | -| backend_start | timestamp with time zone | Time when this process was started (that is, when the client connected to this WAL sender). | -| backend_xmin | xid | The oldest transaction the client is interested in. | -| state | text | Current WAL sender state. Always `streaming`. | -| sent_lsn | pg_lsn | Last write-ahead log location sent on this connection. | -| write_lsn | pg_lsn | The last LSN acknowledged by the logical replication client. | -| flush_lsn | pg_lsn | Same as `write_lsn`. | -| replay_lsn | pg_lsn | Same as `write_lsn`. | -| write_lag | interval | The difference between the timestamp of the latest record in WAL and the timestamp of the last acknowledged record. Since YugabyteDB does not differentiate between write, flush, or replay, this value is the same for all three lag metrics. | -| flush_lag | interval | Same as `write_lag`. | -| replay_lag | interval | Same as `write_lag`. | -| sync_priority | integer | Synchronous state of this standby server. Always 0, as logical replication only supports asynchronous replication. | -| sync_state | text | Synchronous state of this standby server. Always `async`. | -| reply_time | timestamp with time zone | Timestamp of the last reply message received from the client. | - -## CDC service metrics - -Provide information about the CDC service in YugabyteDB. - -| Metric name | Type | Description | -| :---- | :---- | :---- | -| cdcsdk_change_event_count | `long` | The number of records sent by the CDC Service. | -| cdcsdk_traffic_sent | `long` | Total traffic sent, in bytes. | -| cdcsdk_sent_lag_micros | `long` | This lag metric is calculated by subtracting the timestamp of the latest record in the WAL of a tablet from the last record sent to the CDC connector. | -| cdcsdk_expiry_time_ms | `long` | The time left to read records from WAL is tracked by the Stream Expiry Time (ms). | -| cdcsdk_flush_lag | `long` | This lag metric shows the difference between the timestamp of the latest record in the WAL and the replication slot's restart time.| - -CDC service metrics are only calculated for tablets that are of interest for a replication slot. By default, tablets are considered to be of interest if they are polled at least once in 4 hours. You can configure the frequency using the [cdcsdk_tablet_not_of_interest_timeout_secs](../../../../reference/configuration/yb-tserver/#cdcsdk-tablet-not-of-interest-timeout-secs) YB-TServer flag. Metrics are calculated considering unpolled tablets until this timeout elapses. - -## Connector metrics - - - -Refer to [Monitoring](../yugabytedb-connector/#monitoring) for information on YugabyteDB connector metrics. diff --git a/docs/content/preview/additional-features/change-data-capture/using-yugabytedb-grpc-replication/_index.md b/docs/content/preview/additional-features/change-data-capture/using-yugabytedb-grpc-replication/_index.md deleted file mode 100644 index f09ab349580f..000000000000 --- a/docs/content/preview/additional-features/change-data-capture/using-yugabytedb-grpc-replication/_index.md +++ /dev/null @@ -1,92 +0,0 @@ ---- -title: CDC using YugabyteDB gRPC replication protocol -headerTitle: CDC using gRPC replication protocol -linkTitle: gRPC protocol -description: CDC using YugabyteDB gRPC replication protocol. -headcontent: Capture changes made to data in the database -tags: - feature: early-access -aliases: - - /preview/explore/change-data-capture/cdc-overview/ - - /preview/explore/change-data-capture/using-yugabytedb-grpc-replication/ -menu: - preview: - identifier: explore-change-data-capture-grpc-replication - parent: explore-change-data-capture - weight: 280 -type: indexpage -showRightNav: true ---- - -YugabyteDB CDC captures changes made to data in the database and streams those changes to external processes, applications, or other databases. CDC allows you to track and propagate changes in a YugabyteDB database to downstream consumers based on its Write-Ahead Log (WAL). YugabyteDB CDC uses Debezium to capture row-level changes resulting from INSERT, UPDATE, and DELETE operations in the upstream database, and publishes them as events to Kafka using Kafka Connect-compatible connectors. - -![What is CDC](/images/explore/cdc-overview-work.png) - - - -## Get started - -Get started with Yugabyte gRPC replication. - -For tutorials on streaming data to Kafka environments, including Amazon MSK, Azure Event Hubs, and Confluent Cloud, see [Kafka environments](/preview/tutorials/cdc-tutorials/). - -{{}} -[Get started](./cdc-get-started) using the connector. -{{}} - -## Monitoring - -You can monitor the activities and status of the deployed connectors using the http end points provided by YugabyteDB. - -{{}} -Learn how to [monitor](./cdc-monitor/) your CDC setup. -{{}} - -## YugabyteDB gRPC Connector - -To capture and stream your changes in YugabyteDB to an external system, you need a connector that can read the changes in YugabyteDB and stream it out. For this, you can use the YugabyteDB gRPC connector, which is based on the Debezium platform. The connector is deployed as a set of Kafka Connect-compatible connectors, so you first need to define a YugabyteDB connector configuration and then start the connector by adding it to Kafka Connect. - -{{}} -For reference documentation, see [YugabyteDB gRPC Connector](./debezium-connector-yugabytedb/). -{{}} - -## Known limitations - -* A single stream can only be used to stream data from one namespace only. -* There should be a primary key on the table you want to stream the changes from. -* CDC is not supported on tables that are also the target of xCluster replication (see issue {{}}). However, both CDC and xCluster can work simultaneously on the same source tables. - - When performing [switchover](../../../deploy/multi-dc/async-replication/async-transactional-switchover/) or [failover](../../../deploy/multi-dc/async-replication/async-transactional-failover/) on xCluster, if you are using CDC, remember to also reconfigure CDC to use the new primary universe. - -* Currently, CDC doesn't support schema evolution for changes that require table rewrites (for example, [ALTER TYPE](../../../api/ysql/the-sql-language/statements/ddl_alter_table/#alter-type-with-table-rewrite)), or DROP TABLE and TRUNCATE TABLE operations. -* YCQL tables aren't currently supported. Issue {{}}. -* [Composite types](../../../explore/ysql-language-features/data-types#composite-types) are currently not supported. Issue {{}}. - -* If a row is updated or deleted in the same transaction in which it was inserted, CDC cannot retrieve the before-image values for the UPDATE / DELETE event. If the replica identity is not CHANGE, then CDC will throw an error while processing such events. - - To handle updates/deletes with a non-CHANGE replica identity, set the YB-TServer flag `cdc_send_null_before_image_if_not_exists` to true. With this flag enabled, CDC will send a null before-image instead of failing with an error. - -In addition, CDC support for the following features will be added in upcoming releases: - -* Support for point-in-time recovery (PITR) is tracked in issue {{}}. -* Support for transaction savepoints is tracked in issue {{}}. -* Support for enabling CDC on Read Replicas is tracked in issue {{}}. -* Support for schema evolution with before image is tracked in issue {{}}. - -## Learn more - -* [CDC architecture](../../../architecture/docdb-replication/change-data-capture/) -* [Examples of CDC usage and patterns](https://github.com/yugabyte/cdc-examples/tree/main) {{}} -* [Tutorials to deploy in different Kafka environments](/preview/tutorials/cdc-tutorials/) {{}} -* [Data Streaming Using YugabyteDB CDC, Kafka, and SnowflakeSinkConnector](https://www.yugabyte.com/blog/data-streaming-using-yugabytedb-cdc-kafka-and-snowflakesinkconnector/) {{}} -* [Unlock Azure Storage Options With YugabyteDB CDC](https://www.yugabyte.com/blog/unlocking-azure-storage-options-with-yugabytedb-cdc/) {{}} -* [Change Data Capture From YugabyteDB to Elasticsearch](https://www.yugabyte.com/blog/change-data-capture-cdc-yugabytedb-elasticsearch/) {{}} -* [Snowflake CDC: Publishing Data Using Amazon S3 and YugabyteDB](https://www.yugabyte.com/blog/snowflake-cdc-publish-data-using-amazon-s3-yugabytedb/) {{}} -* [Streaming Changes From YugabyteDB to Downstream Databases](https://www.yugabyte.com/blog/streaming-changes-yugabytedb-cdc-downstream-databases/) {{}} -* [Change Data Capture from YugabyteDB CDC to ClickHouse](https://www.yugabyte.com/blog/change-data-capture-cdc-yugabytedb-clickhouse/) {{}} -* [How to Run Debezium Server with Kafka as a Sink](https://www.yugabyte.com/blog/change-data-capture-cdc-run-debezium-server-kafka-sink/) {{}} -* [Change Data Capture Using a Spring Data Processing Pipeline](https://www.yugabyte.com/blog/change-data-capture-cdc-spring-data-processing-pipeline/) {{}} diff --git a/docs/content/preview/additional-features/connection-manager-ysql/_index.md b/docs/content/preview/additional-features/connection-manager-ysql/_index.md deleted file mode 100644 index d8fa05cf089f..000000000000 --- a/docs/content/preview/additional-features/connection-manager-ysql/_index.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: YSQL Connection Manager -headerTitle: YSQL Connection Manager -linkTitle: YSQL Connection Manager -description: Built-in server-side connection pooler for YSQL -headcontent: Built-in server-side connection pooler for YSQL -tags: - feature: early-access -menu: - preview: - identifier: connection-manager - parent: additional-features - weight: 10 -type: indexpage ---- - -YugabyteDB includes a built-in connection pooler, YSQL Connection Manager. Because the manager is bundled with the product, it is convenient to manage, monitor, and configure the server connections without additional third-party tools. When combined with [smart drivers](../../drivers-orms/smart-drivers/), YSQL Connection Manager simplifies application architecture and enhances developer productivity. - -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/admin/_index.md b/docs/content/preview/admin/_index.md deleted file mode 100644 index 8150abca462f..000000000000 --- a/docs/content/preview/admin/_index.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: CLIs and command line tools -headerTitle: Command line tools -linkTitle: CLIs -description: Use these CLIs and command line tools to interact with YugabyteDB. -headcontent: Tools for interacting with, configuring, and managing YugabyteDB -menu: - preview: - identifier: admin - parent: reference - weight: 1400 -type: indexpage ---- - -YugabyteDB ships with a variety of tools to interact with, manage, and configure your cluster. Each tool has been designed for a specific purpose. The following illustration shows which tools operate on which parts of the cluster. - - -![Tools and their purpose](/images/admin/tools_functionalities1.png) - -For information about [yugabyted](../reference/configuration/yugabyted/) and configuring [YB-Master](../reference/configuration/yb-master/) and [YB-TServer](../reference/configuration/yb-tserver/) services, refer to [Configuration](../reference/configuration/). - -For information about YugabyteDB API clients (YSQL shell and YCQL shell), refer to [Client shells](../api/#client-shells). - -{{}} -For all the command line tools, when passing in an argument with a value that starts with a hyphen (for example, `-1`), add a double hyphen (`--`) at the end of other arguments followed by the argument name and value. This tells the binary to treat those arguments as positional. For example, to specify `set_flag ysql_select_parallelism -1`, you need to do the following: - -```bash -yb-ts-cli [other arguments] -- set_flag ysql_select_parallelism -1 -``` - -{{}} - -## Tools - -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/api/_index.md b/docs/content/preview/api/_index.md deleted file mode 100644 index e244bf2dbc95..000000000000 --- a/docs/content/preview/api/_index.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -title: YugabyteDB API reference (for YSQL and YCQL) -headerTitle: API -linkTitle: API -description: YugabyteDB API reference for PostgreSQL-compatible YSQL and Cassandra-compatible YCQL -headcontent: YugabyteDB API reference -type: indexpage -showRightNav: true ---- - - -## SQL APIs - -YugabyteDB supports two flavors of distributed SQL: - -- [YSQL](ysql/) is a fully-relational SQL API that is wire compatible with the SQL language in PostgreSQL. It is best fit for RDBMS workloads that need horizontal write scalability and global data distribution while also using relational modeling features such as JOINs, distributed transactions and referential integrity (such as foreign keys). -- [YCQL](ycql/) is a semi-relational SQL API that is best fit for internet-scale OLTP and HTAP applications needing massive data ingestion and blazing-fast queries. It supports distributed transactions, strongly consistent secondary indexes and a native JSON column type. YCQL has its roots in the Cassandra Query Language. - -Note that the APIs are isolated and independent from one another, and you need to select an API first before undertaking detailed database schema and query design and implementation. - -{{}} - - {{}} - - {{}} - -{{}} - -## Client shells - -YugabyteDB ships with command line interface (CLI) shells for interacting with each SQL API. - -{{}} - - {{}} - - {{}} - -{{}} - -## Management APIs - -YugabyteDB Anywhere and Aeon both provide APIs that can be used to deploy and manage universes, query system status, manage accounts, and more. - -{{< sections/2-boxes >}} - {{< sections/bottom-image-box - title="YugabyteDB Anywhere API" - description="Manage YugabyteDB Anywhere using the API." - buttonText="API Documentation" - buttonUrl="https://api-docs.yugabyte.com/docs/yugabyte-platform/f10502c9c9623-yugabyte-db-anywhere-api-overview" - >}} - - {{< sections/bottom-image-box - title="YugabyteDB Aeon API" - description="Manage YugabyteDB Aeon using the API." - buttonText="API Documentation" - buttonUrl="https://api-docs.yugabyte.com/docs/managed-apis/9u5yqnccbe8lk-yugabyte-db-aeon-rest-api" - >}} - -{{< /sections/2-boxes >}} diff --git a/docs/content/preview/api/ycql/_index.md b/docs/content/preview/api/ycql/_index.md deleted file mode 100644 index 59a1d7304ab4..000000000000 --- a/docs/content/preview/api/ycql/_index.md +++ /dev/null @@ -1,137 +0,0 @@ ---- -title: YCQL API reference -headerTitle: YCQL API reference -linkTitle: YCQL -description: YCQL is a semi-relational API that is best fit for internet-scale OLTP & HTAP applications. -summary: Reference for the YCQL API -headcontent: Cassandra-compatible API -showRightNav: true -type: indexpage ---- - -## Introduction - -Yugabyte Cloud Query Language (YCQL) is a semi-relational SQL API that is best fit for internet-scale OLTP and HTAP applications needing massive data ingestion and blazing-fast queries. It supports strongly consistent secondary indexes, a native JSON column type, and distributed transactions. It has its roots in the [Cassandra Query Language (CQL)](http://cassandra.apache.org/doc/latest/cql/index.html). - -This page covers the following YCQL features. - -- Data definition language (DDL) statements. -- Data manipulation language (DML) statements. -- Builtin functions and Expression operators. -- Primitive user-defined data types. - -## DDL statements - -Data definition language (DDL) statements are instructions for the following database operations. - -- Create, alter, and drop database objects -- Create, grant, and revoke users and roles - -Statement | Description | -----------|-------------| -[`ALTER TABLE`](ddl_alter_table) | Alter a table | -[`ALTER KEYSPACE`](ddl_alter_keyspace) | Alter a keyspace | -[`CREATE INDEX`](ddl_create_index/) | Create a new index on a table | -[`CREATE KEYSPACE`](ddl_create_keyspace) | Create a new keyspace | -[`CREATE TABLE`](ddl_create_table) | Create a new table | -[`CREATE TYPE`](ddl_create_type) | Create a user-defined data type | -[`DROP INDEX`](ddl_drop_index) | Remove an index | -[`DROP KEYSPACE`](ddl_drop_keyspace) | Remove a keyspace | -[`DROP TABLE`](ddl_drop_table) | Remove a table | -[`DROP TYPE`](ddl_drop_type) | Remove a user-defined data type | -[`USE`](ddl_use) | Use an existing keyspace for subsequent commands | - -## DDL security statements - -Security statements are instructions for managing and restricting operations on the database objects. - -- Create, grant, and revoke users and roles -- Grant, and revoke permissions on database objects - -This feature is enabled by setting the YB-TServer configuration flag [`--use_cassandra_authentication`](../../reference/configuration/yb-tserver/#use-cassandra-authentication) to `true`. - -Statement | Description | -----------|-------------| -[`ALTER ROLE`](ddl_alter_role) | Alter a role | -[`CREATE ROLE`](ddl_create_role) | Create a new role | -[`DROP ROLE`](ddl_drop_role) | Remove a role | -[`GRANT PERMISSION`](ddl_grant_permission) | Grant a permission on an object to a role | -[`REVOKE PERMISSION`](ddl_revoke_permission) | Revoke a permission on an object from a role | -[`GRANT ROLE`](ddl_grant_role) | Grant a role to another role | -[`REVOKE ROLE`](ddl_revoke_role) | Revoke a role from another role | - -## DML statements - -Data manipulation language (DML) statements are used to read from and write to the existing database objects. YugabyteDB implicitly commits any updates by DML statements (similar to how Apache Cassandra behaves). - -Statement | Description | -----------|-------------| -[`INSERT`](dml_insert) | Insert rows into a table | -[`SELECT`](dml_select/) | Select rows from a table | -[`UPDATE`](dml_update/) | Update rows in a table | -[`DELETE`](dml_delete/) | Delete specific rows from a table | -[`TRANSACTION`](dml_transaction) | Makes changes to multiple rows in one or more tables in a transaction | -[`TRUNCATE`](dml_truncate) | Remove all rows from a table | - -## Expressions - -An expression is a finite combination of one or more values, operators, functions, and expressions that specifies a computation. Expressions can be used in the following components. - -- The select list of [`SELECT`](dml_select/) statement. For example, `SELECT id + 1 FROM sample_table;`. -- The WHERE clause in [`SELECT`](dml_select/), [`DELETE`](dml_delete/), [`INSERT`](dml_insert), or [`UPDATE`](dml_update/). -- The IF clause in [`DELETE`](dml_delete/), [`INSERT`](dml_insert), or [`UPDATE`](dml_update/). -- The VALUES clause in [`INSERT`](dml_insert). -- The SET clause in [`UPDATE`](dml_update/). - -Currently, the following expressions are supported. - -Expression | Description | ------------|-------------| -[Simple Value](expr_simple) | Column, constant, or null. Column alias cannot be used in expression yet. | -[Subscript `[]`](expr_subscript) | Subscripting columns of collection data types | -[Operator Call](expr_ocall) | Builtin operators only | -[Function Call](expr_fcall/) | Builtin function calls only | - -## Data types - -The following table lists all supported primitive types. - -Primitive Type | Allowed in Key | Type Parameters | Description | ----------------|----------------|-----------------|-------------| -[`BIGINT`](type_int) | Yes | - | 64-bit signed integer | -[`BLOB`](type_blob) | Yes | - | String of binary characters | -[`BOOLEAN`](type_bool) | Yes | - | Boolean | -[`COUNTER`](type_int) | No | - | 64-bit signed integer | -[`DECIMAL`](type_number) | Yes | - | Exact, arbitrary-precision number, no upper-bound on decimal precision | -[`DATE`](type_datetime/) | Yes | - | Date | -[`DOUBLE`](type_number) | Yes | - | 64-bit, inexact, floating-point number | -[`FLOAT`](type_number) | Yes | - | 64-bit, inexact, floating-point number | -[`FROZEN`](type_frozen) | Yes | 1 | Collection in binary format | -[`INET`](type_inet) | Yes | - | String representation of IP address | -[`INT` | `INTEGER`](type_int) | Yes | - | 32-bit signed integer | -[`LIST`](type_collection) | No | 1 | Collection of ordered elements | -[`MAP`](type_collection) | No | 2 | Collection of pairs of key-and-value elements | -[`SET`](type_collection) | No | 1 | Collection of unique elements | -[`SMALLINT`](type_int) | Yes | - | 16-bit signed integer | -[`TEXT` | `VARCHAR`](type_text) | Yes | - | String of Unicode characters | -[`TIME`](type_datetime/) | Yes | - | Time of day | -[`TIMESTAMP`](type_datetime/) | Yes | - | Date-and-time | -[`TIMEUUID`](type_uuid) | Yes | - | Timed UUID | -[`TINYINT`](type_int) | Yes | - | 8-bit signed integer | -[`UUID`](type_uuid) | Yes | - | Standard UUID | -[`VARINT`](type_int) | Yes | - | Arbitrary-precision integer | -[`JSONB`](type_jsonb) | No | - | JSON data type similar to PostgreSQL jsonb | - -[User-defined data types](ddl_create_type) are also supported. - -## Learn more - -- [Advantages of YCQL over Cassandra](/preview/faq/comparisons/cassandra) -- [YCQL - Cassandra 3.4 compatibility](../../explore/ycql-language/cassandra-feature-support) diff --git a/docs/content/preview/api/ycql/ddl_alter_table.md b/docs/content/preview/api/ycql/ddl_alter_table.md deleted file mode 100644 index dc477557c24e..000000000000 --- a/docs/content/preview/api/ycql/ddl_alter_table.md +++ /dev/null @@ -1,156 +0,0 @@ ---- -title: ALTER TABLE statement [YCQL] -headerTitle: ALTER TABLE -linkTitle: ALTER TABLE -description: Use the ALTER TABLE statement to change the schema or definition of an existing table. -menu: - preview_api: - parent: api-cassandra - weight: 1220 -aliases: - - /preview/api/cassandra/ddl_alter_table - - /preview/api/ycql/ddl_alter_table -type: docs ---- - -## Synopsis - -Use the `ALTER TABLE` statement to change the schema or definition of an existing table. -It allows adding, dropping, or renaming a column as well as updating a table property. - -## Syntax - -### Diagram - -ALTERTABLEtable_nameADD,column_namecolumn_typeDROP,column_nameRENAME,column_nameTOcolumn_nameWITHANDproperty_name=property_literal - -### Grammar - -```ebnf -alter_table ::= ALTER TABLE table_name alter_operator [ alter_operator ...] - -alter_operator ::= add_op | drop_op | rename_op | property_op - -add_op ::= ADD column_name column_type [ ',' column_name column_type ...] - -drop_op ::= DROP column_name [ ',' column_name ...] - -rename_op ::= RENAME column_name TO column_name [ ',' column_name TO column_name ...] - -property_op ::= WITH property_name '=' property_literal [ AND property_name '=' property_literal ...] -``` - -Where - -- `table_name`, `column_name`, and `property_name` are identifiers (`table_name` may be qualified with a keyspace name). -- `property_literal` is a literal of either [boolean](../type_bool), [text](../type_text), or [map](../type_collection) data type. - -## Semantics - -- An error is raised if `table_name` does not exist in the associated keyspace. -- Columns that are part of `PRIMARY KEY` cannot be altered. -- When adding a column, its value for all existing rows in the table defaults to `null`. -- After dropping a column, all values currently stored for that column in the table are discarded (if any). - -## Examples - -### Add a column to a table - -```sql -ycqlsh:example> CREATE TABLE employees (id INT, name TEXT, salary FLOAT, PRIMARY KEY((id), name)); -``` - -```sql -ycqlsh:example> ALTER TABLE employees ADD title TEXT; -``` - -```sql -ycqlsh:example> DESCRIBE TABLE employees; -``` - -Following result would be shown. - -```output -CREATE TABLE example.employees ( - id int, - name text, - salary float, - title text, - PRIMARY KEY (id, name) -) WITH CLUSTERING ORDER BY (name ASC); -``` - -### Remove a column from a table - -```sql -ycqlsh:example> ALTER TABLE employees DROP salary; -``` - -```sql -ycqlsh:example> DESCRIBE TABLE employees; -``` - -Following result would be shown. - -```output -CREATE TABLE example.employees ( - id int, - name text, - title text, - PRIMARY KEY (id, name) -) WITH CLUSTERING ORDER BY (name ASC); -``` - -### Rename a column in a table - -```sql -ycqlsh:example> ALTER TABLE employees RENAME title TO job_title; -``` - -```sql -ycqlsh:example> DESCRIBE TABLE employees; -``` - -Following result would be shown. - -```output -CREATE TABLE example.employees ( - id int, - name text, - job_title text, - PRIMARY KEY (id, name) -) WITH CLUSTERING ORDER BY (name ASC); -``` - -### Update a table property - -You can do this as follows: - -```sql -ycqlsh:example> ALTER TABLE employees WITH default_time_to_live = 5; -``` - -```sql -ycqlsh:example> DESCRIBE TABLE employees; -``` - -Following result would be shown. - -```output -CREATE TABLE example.employees ( - id int, - name text, - job_title text, - PRIMARY KEY (id, name) -) WITH CLUSTERING ORDER BY (name ASC) - AND default_time_to_live = 5; -``` - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`DELETE`](../dml_delete/) -- [`DROP TABLE`](../ddl_drop_table) -- [`INSERT`](../dml_insert) -- [`SELECT`](../dml_select/) -- [`UPDATE`](../dml_update/) diff --git a/docs/content/preview/api/ycql/ddl_create_index.md b/docs/content/preview/api/ycql/ddl_create_index.md deleted file mode 100644 index e1152eec1a73..000000000000 --- a/docs/content/preview/api/ycql/ddl_create_index.md +++ /dev/null @@ -1,437 +0,0 @@ ---- -title: CREATE INDEX statement [YCQL] -headerTitle: CREATE INDEX -linkTitle: CREATE INDEX -summary: Create a new index on a table -description: Use the CREATE INDEX statement to create a new index on a table. -menu: - preview_api: - parent: api-cassandra - weight: 1225 -aliases: - - /preview/api/ycql/ddl_create_index -type: docs ---- - -## Synopsis - -Use the `CREATE INDEX` statement to create a new index on a table. It defines the index name, index columns, and additional columns to include. - -{{}} -In YugabyteDB, indexes are global and are implemented just like tables. They are split into tablets and distributed across the different nodes in the cluster. The sharding of indexes is based on the primary key of the index and is independent of how the main table is sharded and distributed. Indexes are not colocated with the base table. -{{}} - -## Syntax - -### Diagram - -#### create_index - -CREATEUNIQUEDEFERREDINDEXIFNOTEXISTSindex_nameONtable_name(partition_key_columns,clustering_key_columns)covering_columnsindex_propertiesWHEREindex_predicate - -#### partition_key_columns - -index_column(,index_column) - -#### clustering_key_columns - -,index_column - -#### index_properties - -WITHANDproperty_name=property_literalCLUSTERINGORDERBY(,index_columnASCDESC) - -#### index_column - -column_namejsonb_attribute - -#### jsonb_attribute - -column_name->'attribute_name'->>'attribute_name' - -#### covering_columns - -COVERINGINCLUDE(,column_name) - -#### index_predicate - -where_expression - -### Grammar - -```ebnf -create_index ::= CREATE [ UNIQUE ] [ DEFERRED ] INDEX - [ IF NOT EXISTS ] index_name ON table_name ( - partition_key_columns , [ clustering_key_columns ] ) - [ covering_columns ] [ index_properties ] - [ WHERE index_predicate ] - -partition_key_columns ::= index_column | ( index_column [ , ... ] ) - -clustering_key_columns ::= index_column [ , ... ] - -index_properties ::= WITH - { property_name = property_literal - | CLUSTERING ORDER BY ( - { index_column [ ASC | DESC ] } [ , ... ] ) } - [ AND ... ] - -index_column ::= column_name | jsonb_attribute - -jsonb_attribute ::= column_name [ -> 'attribute_name' [ ... ] ] ->> 'attribute_name' - -covering_columns ::= { COVERING | INCLUDE } ( column_name [ , ... ] ) - -index_predicate ::= where_expression -``` - -Where - -- `index_name`, `table_name`, `property_name`, and `column_name` are identifiers. -- `table_name` may be qualified with a keyspace name. -- `index_name` cannot be qualified with a keyspace name because an index must be created in the table's keyspace. -- `property_literal` is a literal of either [boolean](../type_bool), [text](../type_text), or [map](../type_collection) data type. -- `index_column` can be any data type except `MAP`, `SET`, `LIST`, `JSONB`, `USER_DEFINED_TYPE`. - - -## Semantics - -- An error is raised if transactions have not be enabled using the `WITH transactions = { 'enabled' : true }` clause on the table to be indexed. This is because secondary indexes internally use distributed transactions to ensure ACID guarantees in the updates to the secondary index and the associated primary key. More details [here](https://www.yugabyte.com/blog/yugabyte-db-1-1-new-feature-speeding-up-queries-with-secondary-indexes/). -- An error is raised if `index_name` already exists in the associated keyspace unless the `IF NOT EXISTS` option is used. - -{{< note title="Note" >}} - -When an index is created on an existing table, YugabyteDB will automatically backfill existing data into the index in an online manner (that is, while continuing to serve other concurrent writes and traffic). For more details on how this is done, see [Online Index Backfill](https://github.com/yugabyte/yugabyte-db/blob/master/architecture/design/online-index-backfill.md). - -{{< /note >}} - -### User-enforced consistency - -{{}} -Opt for user-enforced consistency only when there is no other solution to your problem. User-enforced consistency requires considerable user effort to keep the index and table in sync. -{{}} - -Indexes require transactions to have been enabled on the table. For cases where the table was created without enabling transactions, `consistency_level` has to be set to `user_enforced` like, - -```sql -CREATE TABLE orders (id int PRIMARY KEY, warehouse int); -CREATE INDEX ON orders (warehouse) - WITH transactions = { 'enabled' : false, 'consistency_level' : 'user_enforced' }; -``` - -{{< warning title="Syncing table and index">}} -When using an index without transactions enabled, it is the responsibility of the application to retry any insert/update/delete failures to make sure that the table and index are in sync. - -Also, if the index is created after data has been added to the table, the index may **not** be backfilled automatically depending on the setting of the `disable_index_backfill_for_non_txn_tables` flag. If set to `true`, then it is the responsibility of the user to trigger a backfill using the [yb-admin backfill_indexes_for_table](../../../admin/yb-admin/#backfill-indexes-for-table) command, which will trigger the backfill after a small delay of about a minute. This delay is controlled by the `index_backfill_upperbound_for_user_enforced_txn_duration_ms` flag. -{{< /warning >}} - -### PARTITION KEY - -- Partition key is required and defines a split of the index into _partitions_. - -### CLUSTERING KEY - -- Clustering key is optional and defines an ordering for index rows within a partition. -- Default ordering is ascending (`ASC`) but can be set for each clustering column as ascending or descending using the `CLUSTERING ORDER BY` property. -- Any primary key column of the table not indexed explicitly in `index_columns` is added as a clustering column to the index implicitly. This is necessary so that the whole primary key of the table is indexed. - -### *index_properties* - -- The `CLUSTERING ORDER BY` property can be used to set the ordering for each clustering column individually (default is `ASC`). -- The `TABLETS = ` property specifies the number of tablets to be used for the specified YCQL index. Setting this property overrides the value from the [`--yb_num_shards_per_tserver`](../../../reference/configuration/yb-tserver/#yb-num-shards-per-tserver) option. For an example, see [Create an index specifying the number of tablets](#create-an-index-specifying-the-number-of-tablets). -- Use the `AND` operator to use multiple index properties. -- When setting a TTL on the index using `default_time_to_live`, please ensure that the TTL value is the same as that of the table's TTL. If they are different, it would lead to the index and the table being out of sync and would lead to unexpected behavior. - -{{}} -**Caveat**: Row-level TTL cannot be set on a table with a secondary index during INSERTS/UPDATES. {{}} -{{}} - -### INCLUDED COLUMNS - -- Included columns are optional table columns whose values are copied into the index in addition to storing them in the table. When additional columns are included in the index, they can be used to respond to queries directly from the index without querying the table. - -- The following can't be added to an index's included columns: static columns of a table, expressions, and table columns with the following types: frozen, map, set, list, tuple, jsonb, and user defined. - -### UNIQUE INDEX - -- A unique index disallows duplicate values from being inserted into the indexed columns. It can be used to ensure uniqueness of index column values. - -### DEFERRED INDEX - -Currently, an "index backfill" job is launched for each index that is created. For the case where you create a table and add multiple indexes, the main table needs to be scanned multiple times to populate each index. This is unnecessary, and can also cause issues with the single touch and multi touch block cache algorithm. - -After creating a set of indexes with their backfill deferred, you can then trigger a backfill job for the entire batch of indexes (on the same table) in one of the following ways: - -- Create a new index that is not deferred: - - ```cql - CREATE DEFERRED INDEX idx_1 on table_name(col_1); // No backfill launched. - CREATE DEFERRED INDEX idx_2 on table_name(col_2); // No backfill launched. - CREATE DEFERRED INDEX idx_9 on table_name(col_9); // No backfill launched. - - - // To launch backfill ... - CREATE INDEX idx_10 on table_name(col_10); // Will launch backfill for idx_10 and - // all deferred indexes idx_1 .. idx_9 - // on the same table viz: table_name. - ``` - -- Use yb-admin to launch backfill for deferred indexes on the table. - - ```cql - CREATE DEFERRED INDEX idx_1 on table_name(col_1); // No backfill launched. - CREATE DEFERRED INDEX idx_2 on table_name(col_2); // No backfill launched. - ... - CREATE DEFERRED INDEX idx_9 on table_name(col_9); // No backfill launched. - CREATE DEFERRED INDEX idx_10 on table_name(col_10); // No backfill launched. - ``` - - Launch a backfill job for backfilling all the deferred indexes using the `backfill_indexes_for_table` command as follows: - - ```bash - bin/yb-admin --master_addresses backfill_indexes_for_table ycql.ybdemo table_name - ``` -- Use the [`--defer_index_backfill`](../../../reference/configuration/yb-master#defer-index-backfill) YB-Master flag to force all indexes to be DEFERRED, and run `yb-admin backfill_indexes_for_table` to backfill indexes. - -### PARTIAL INDEX - -- If a `WHERE` clause is specified, only rows which satisfy the `index_predicate` are indexed. -- An `index_predicate` can have sub-expressions on columns of these data types: `TINYINT`, `SMALLINT`, `INT/INTEGER`, `BIGINT`, `VARINT`, `BOOLEAN` and `TEXT` along with these operators (when applicable): `=, !=, >, <, >=, <=`. -- Partial indexes can be `UNIQUE`. A UNIQUE partial index enforces the constraint that for each possible tuple of indexed columns, only one row that satisfies the `index_predicate` is allowed in the table. -- `SELECT` queries can use a partial index for scanning if the `SELECT` statement's `where_expression` => (logically implies) `index_predicate`. - - {{< note title="Note" >}} - -- A partial index might not be chosen even if the implication holds in case there are better query plans. -- The logical implication holds if all sub-expressions of the `index_predicate` are present as is in the `where_expression`. For example, assume `where_expression = A AND B AND C`, `index_predicate_1 = A AND B`, `index_predicate_2 = A AND B AND D`, `index_predicate_3 = A AND B AND C AND D`. Then `where_expression` only implies `index_predicate_1` - -- Currently, valid mathematical implications are not taken into account when checking for logical implication. For example, even if `where_expression = x > 5` and `index_predicate = x > 4`, the `SELECT` query will not use the index for scanning. This is because the two sub-expressions `x > 5` and `x > 4` differ. - - {{< /note >}} - -- When using a prepared statement, the logical implication check (to decide if a partial index is usable), will only consider those sub-expressions of `where_expression` that don't have a bind variable. This is because the query plan is decided before execution (i.e., when a statement is prepared). - -```sql -ycqlsh:example> CREATE TABLE orders (customer_id INT, - order_date TIMESTAMP, - product JSONB, - warehouse_id INT, - amount DOUBLE, - PRIMARY KEY ((customer_id), order_date)) - WITH transactions = { 'enabled' : true }; - -ycqlsh:example> CREATE INDEX idx ON orders (warehouse_id) - WHERE warehouse_id < 100; - -ycqlsh:example> EXPLAIN SELECT product FROM orders - WHERE warehouse_id < 100 AND order_date >= ?; // Idx can be used -``` - -```output - QUERY PLAN ------------------------------------------- - Index Scan using temp.idx on temp.orders - Filter: (order_date >= :order_date) -``` - -```sql -ycqlsh:example> EXPLAIN SELECT product FROM orders - WHERE warehouse_id < ? and order_date >= ?; // Idx cannot be used -``` - -```output - QUERY PLAN --------------------------------------------------------------------------- - Seq Scan on temp.orders - Filter: (warehouse_id < :warehouse_id) AND (order_date >= :order_date) -``` - -- Without partial indexes, we do not allow many combinations of operators together on the same column in a `SELECT`'s where expression e.g.: `WHERE v1 != NULL and v1 = 5`. But if there was a partial index that subsumes some clauses of the `SELECT`'s where expression, two or more operators otherwise not supported together, might be supported. - -```sql -ycqlsh:example> EXPLAIN SELECT product FROM orders - WHERE warehouse_id != NULL AND warehouse_id = ?; -``` - -```output -SyntaxException: Invalid CQL Statement. Illogical condition for where clause -EXPLAIN SELECT product from orders where warehouse_id != NULL and warehouse_id = ?; - ^^^^^^^^^^^^ - (ql error -12) -``` - -```sql -ycqlsh:example> CREATE INDEX warehouse_idx ON orders (warehouse_id) - WHERE warehouse_id != NULL; -ycqlsh:example> EXPLAIN SELECT product FROM orders - WHERE warehouse_id != NULL AND warehouse_id = ?; // warehouse_idx can be used -``` - -```output - QUERY PLAN ----------------------------------------------------- - Index Scan using temp.warehouse_idx on temp.orders - Key Conditions: (warehouse_id = :warehouse_id) -``` - -## Examples - -### Create a table to be indexed - -'customer_id' is the partitioning column and 'order_date' is the clustering column. - -```sql -ycqlsh:example> CREATE TABLE orders (customer_id INT, - order_date TIMESTAMP, - product JSONB, - warehouse_id INT, - amount DOUBLE, - PRIMARY KEY ((customer_id), order_date)) - WITH transactions = { 'enabled' : true }; -``` - -### Create an index for query by the `order_date` column - -```sql -ycqlsh:example> CREATE INDEX orders_by_date ON orders (order_date) INCLUDE (amount); -``` - -### Create an index for query by the JSONB attribute `product->>'name'` - -```sql -ycqlsh:example> CREATE INDEX product_name - ON orders (product->>'name') INCLUDE (amount); -``` - -### Create an index for query by the `warehouse_id` column - -```sql -ycqlsh:example> CREATE INDEX orders_by_warehouse - ON orders (warehouse_id, order_date) INCLUDE (amount); -``` - -### Insert some data - -```sql -ycqlsh:example> INSERT INTO orders (customer_id, order_date, product, warehouse_id, amount) - VALUES (1001, '2018-01-10', '{ "name":"desk" }', 107, 100.30); -ycqlsh:example> INSERT INTO orders (customer_id, order_date, product, warehouse_id, amount) - VALUES (1002, '2018-01-11', '{ "name":"chair" }', 102, 50.45); -ycqlsh:example> INSERT INTO orders (customer_id, order_date, product, warehouse_id, amount) - VALUES (1001, '2018-04-09', '{ "name":"pen" }', 102, 20.25); -ycqlsh:example> INSERT INTO orders (customer_id, order_date, product, warehouse_id, amount) - VALUES (1003, '2018-04-09', '{ "name":"pencil" }', 108, 200.80); -``` - -### Query by the partition column `customer_id` in the table - -```sql -ycqlsh:example> SELECT SUM(amount) FROM orders - WHERE customer_id = 1001 AND order_date >= '2018-01-01'; -``` - -```output - sum(amount) -------------- - 120.55 -``` - -### Query by the partition column `order_date` in the index `orders_by_date` - -```sql -ycqlsh:example> SELECT SUM(amount) FROM orders - WHERE order_date = '2018-04-09'; -``` - -```output - sum(amount) -------------- - 221.05 -``` - -### Query by the partition column `product->>'name'` in the index `product_name` - -```sql -ycqlsh:example> SELECT SUM(amount) FROM orders - WHERE product->>'name' = 'desk'; -``` - -```output - sum(amount) -------------- - 100.30 -``` - -### Query by the partition column `warehouse_id` column in the index `orders_by_warehouse` - -```sql -ycqlsh:example> SELECT SUM(amount) FROM orders - WHERE warehouse_id = 102 AND order_date >= '2018-01-01'; -``` - -```output - sum(amount) -------------- - 70.7 -``` - -### Create a table with a unique index - -You can do this as follows: - -```sql -ycqlsh:example> CREATE TABLE emp (enum INT primary key, - lastname VARCHAR, - firstname VARCHAR, - userid VARCHAR) - WITH transactions = { 'enabled' : true }; -ycqlsh:example> CREATE UNIQUE INDEX emp_by_userid ON emp (userid); -``` - -### Insert values into the table and verify no duplicate `userid` is inserted - -```sql -ycqlsh:example> INSERT INTO emp (enum, lastname, firstname, userid) - VALUES (1001, 'Smith', 'John', 'jsmith'); -ycqlsh:example> INSERT INTO emp (enum, lastname, firstname, userid) - VALUES (1002, 'Smith', 'Jason', 'jsmith'); -``` - -```output -InvalidRequest: Error from server: code=2200 [Invalid query] message="SQL error: Execution Error. Duplicate value disallowed by unique index emp_by_userid -INSERT INTO emp (enum, lastname, firstname, userid) - ^^^^ -VALUES (1002, 'Smith', 'Jason', 'jsmith'); - (error -300)" -``` - -```sql -ycqlsh:example> INSERT INTO emp (enum, lastname, firstname, userid) - VALUES (1002, 'Smith', 'Jason', 'jasmith'); -ycqlsh:example> SELECT * FROM emp; -``` - -```output - enum | lastname | firstname | userid -------+----------+-----------+--------- - 1002 | Smith | Jason | jasmith - 1001 | Smith | John | jsmith -``` - -### Create an index specifying the number of tablets - -You can use the `CREATE INDEX` statement with the `WITH tablets = ` clause to specify the number of tablets for an index. This is useful to scale the index up or down based on requirements. -For example, for smaller or partial indexes, it may be wasteful to have a large number of shards (tablets). In that case, you can use this to reduce the number of tablets created for the index. -Similarly, for a very large index, you can use this statement to presplit the index into a large number of shards to get improved performance. - -Note that YugabyteDB, by default, presplits an index in `yb_num_shards_per_tserver * num_of_tserver` shards. This clause can be used to override that setting on per-index basis. - -```sql -ycqlsh:example> CREATE TABLE tracking (id int PRIMARY KEY, a TEXT) WITH transactions = { 'enabled' : true }; -ycqlsh:example> CREATE INDEX my_indx ON tracking(a) WITH tablets = 10; -``` - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`DROP INDEX`](../ddl_drop_index) diff --git a/docs/content/preview/api/ycql/ddl_create_table.md b/docs/content/preview/api/ycql/ddl_create_table.md deleted file mode 100644 index 306c1b29e564..000000000000 --- a/docs/content/preview/api/ycql/ddl_create_table.md +++ /dev/null @@ -1,273 +0,0 @@ ---- -title: CREATE TABLE statement [YCQL] -headerTitle: CREATE TABLE -linkTitle: CREATE TABLE -description: Use the CREATE TABLE statement to create a new table in a keyspace. -menu: - preview_api: - parent: api-cassandra - weight: 1240 -aliases: - - /preview/api/ycql/ddl_create_table -type: docs ---- - -## Synopsis - -Use the `CREATE TABLE` statement to create a new table in a keyspace. It defines the table name, column names and types, primary key, and table properties. - -## Syntax - -### Diagram - -#### create_table - -CREATETABLEIFNOTEXISTStable_name(table_schema)table_properties - -#### table_schema - -,column_namecolumn_typePRIMARYKEYSTATICPRIMARYKEY((,column_name),column_name) - -#### table_properties - -WITHANDproperty_name=property_literalCLUSTERINGORDERBY(,column_nameASCDESC)COMPACTSTORAGE - -### Grammar - -```ebnf -create_table ::= CREATE TABLE [ IF NOT EXISTS ] table_name - '(' table_element [ ',' table_element ...] ')' - [WITH table_properties]; - -table_element ::= table_column | table_constraints - -table_column ::= column_name column_type [ column_constraint ...] - -column_constraint ::= PRIMARY KEY | STATIC - -table_constraints ::= PRIMARY KEY '(' partition_key_column_list clustering_key_column_list ')' - -partition_key_column_list ::= '(' column_name [ ',' column_name ...] ')' | column_name - -clustering_key_column_list ::= [ ',' column_name ...] - -table_properties = [table_options] - [[AND] CLUSTERING ORDER BY '(' column_ordering_property [ ',' column_ordering_property ...] ')'] - [[AND] COMPACT STORAGE] - -table_options = property_name '=' property_literal [AND property_name '=' property_literal ...] - -column_ordering_property ::= column_name [ ASC | DESC ] -``` - -Where - -- `table_name`, `column_name`, and `property_name` are identifiers (`table_name` may be qualified with a keyspace name). -- `property_literal` is a literal of either [boolean](../type_bool), [text](../type_text), or [map](../type_collection) data type. - -## Semantics - -- An error is raised if `table_name` already exists in the associated keyspace unless the `IF NOT EXISTS` option is used. - -### PRIMARY KEY - -- Primary key must be defined in either `column_constraint` or `table_constraint` but not in both of them. -- Each row in a table is uniquely identified by its primary key. -- Primary key columns are either _partitioning_ columns or _clustering_ columns (described below). -- If primary key is set as a column constraint, then that column is the partition column and there are no clustering columns. -- If primary key is set as a table constraint then: - - The partition columns are given by the first entry in the primary key list: the nested column list (if given), otherwise the first column. - - The clustering columns are the rest of the columns in the primary key list (if any). -- Types `MAP`, `SET`, `LIST`, `JSONB`, `USER_DEFINED_TYPE` cannot be used in the primary key. - -#### PARTITION KEY - -- Partition key is required and defines a split of rows into _partitions_. -- Rows that share the same partition key form a partition and will be colocated on the same replica node. - -#### CLUSTERING KEY - -- Clustering key is optional and defines an ordering for rows within a partition. -- Default ordering is ascending (`ASC`) but can be set for each clustering column as ascending or descending using the `CLUSTERING ORDER BY` table property. - -### STATIC COLUMNS - -- Columns declared as `STATIC` will share the same value for all rows within a partition (that is, rows having the same partition key). -- Columns in the primary key cannot be static. -- A table without clustering columns cannot have static columns (without clustering columns the primary key and the partition key are identical so static columns would be the same as regular columns). - -### *table_properties* - -- The `CLUSTERING ORDER BY` property can be used to set the ordering for each clustering column individually (default is `ASC`). -- The `default_time_to_live` property sets the default expiration time (TTL) in seconds for a table. The expiration time can be overridden by setting TTL for individual rows. The default value is `0` and means rows do not expire. -- The `transactions` property specifies if distributed transactions are enabled in the table. To enable distributed transactions, use `transactions = { 'enabled' : true }`. -- Use the `AND` operator to use multiple table properties. -- The other YCQL table properties are allowed in the syntax but are currently ignored internally (have no effect). -- The `TABLETS = ` property specifies the number of tablets to be used for the specified YCQL table. Setting this property overrides the value from the [`--yb_num_shards_per_tserver`](../../../reference/configuration/yb-tserver/#yb-num-shards-per-tserver) option. For an example, see [Create a table specifying the number of tablets](#create-a-table-specifying-the-number-of-tablets). -- `COMPACT STORAGE` is only for syntax compatibility with Cassandra. It doesn't affect the underlying storage. - -## Examples - -### Use column constraint to define primary key - -'user_id' is the partitioning column and there are no clustering columns. - -```sql -ycqlsh:example> CREATE TABLE users(user_id INT PRIMARY KEY, full_name TEXT); -``` - -### Use table constraint to define primary key - -'supplier_id' and 'device_id' are the partitioning columns and 'model_year' is the clustering column. - -```sql -ycqlsh:example> CREATE TABLE devices(supplier_id INT, - device_id INT, - model_year INT, - device_name TEXT, - PRIMARY KEY((supplier_id, device_id), model_year)); -``` - -### Use column constraint to define a static column - -You can do this as follows: - -```sql -ycqlsh:example> CREATE TABLE items(supplier_id INT, - item_id INT, - supplier_name TEXT STATIC, - item_name TEXT, - PRIMARY KEY((supplier_id), item_id)); -``` - -```sql -ycqlsh:example> INSERT INTO items(supplier_id, item_id, supplier_name, item_name) - VALUES (1, 1, 'Unknown', 'Wrought Anvil'); -``` - -```sql -ycqlsh:example> INSERT INTO items(supplier_id, item_id, supplier_name, item_name) - VALUES (1, 2, 'Acme Corporation', 'Giant Rubber Band'); -``` - -```sql -ycqlsh:example> SELECT * FROM items; -``` - -```output - supplier_id | item_id | supplier_name | item_name --------------+---------+------------------+------------------- - 1 | 1 | Acme Corporation | Wrought Anvil - 1 | 2 | Acme Corporation | Giant Rubber Band -``` - -### Use table property to define the order (ascending or descending) for clustering columns - -Timestamp column 'ts' will be stored in descending order (latest values first). - -```sql -ycqlsh:example> CREATE TABLE user_actions(user_id INT, - ts TIMESTAMP, - action TEXT, - PRIMARY KEY((user_id), ts)) - WITH CLUSTERING ORDER BY (ts DESC); -``` - -```sql -ycqlsh:example> INSERT INTO user_actions(user_id, ts, action) VALUES (1, '2000-12-2 12:30:15', 'log in'); -``` - -```sql -ycqlsh:example> INSERT INTO user_actions(user_id, ts, action) VALUES (1, '2000-12-2 12:30:25', 'change password'); -``` - -```sql -ycqlsh:example> INSERT INTO user_actions(user_id, ts, action) VALUES (1, '2000-12-2 12:30:35', 'log out'); -``` - -```sql -ycqlsh:example> SELECT * FROM user_actions; -``` - -```output - user_id | ts | action ----------+---------------------------------+----------------- - 1 | 2000-12-02 19:30:35.000000+0000 | log out - 1 | 2000-12-02 19:30:25.000000+0000 | change password - 1 | 2000-12-02 19:30:15.000000+0000 | log in -``` - -### Use table property to define the default expiration time for rows - -You can do this as follows: - -```sql -ycqlsh:example> CREATE TABLE sensor_data(sensor_id INT, - ts TIMESTAMP, - value DOUBLE, - PRIMARY KEY((sensor_id), ts)) - WITH default_time_to_live = 5; -``` - -First insert at time T (row expires at T + 5). - -```sql -ycqlsh:example> INSERT INTO sensor_data(sensor_id, ts, value) VALUES (1, '2017-10-1 11:22:31', 3.1); -``` - -Second insert 3 seconds later (row expires at T + 8). - -```sql -ycqlsh:example> INSERT INTO sensor_data(sensor_id, ts, value) VALUES (2, '2017-10-1 11:22:34', 3.4); -``` - -First select 3 seconds later (at time T + 6). - -```sql -ycqlsh:example> SELECT * FROM sensor_data; -``` - -```output - sensor_id | ts | value ------------+---------------------------------+------- - 2 | 2017-10-01 18:22:34.000000+0000 | 3.4 -``` - -Second select 3 seconds later (at time T + 9). - -```sql -ycqlsh:example> SELECT * FROM sensor_data; -``` - -```output - sensor_id | ts | value ------------+----+------- - -``` - -### Create a table specifying the number of tablets - -You can use the `CREATE TABLE` statement with the `WITH tablets = ` clause to specify the number of tablets for a table. This is useful to scale the table up or down based on requirements. For example, for smaller static tables, it may be wasteful to have a large number of shards (tablets). In that case, you can use this to reduce the number of tablets created for the table. Similarly, for a very large table, you can use this statement to presplit the table into a large number of shards to get improved performance. - -Note that YugabyteDB, by default, presplits a table in `yb_num_shards_per_tserver * num_of_tserver` shards. This clause can be used to override that setting on per-table basis. - -```sql -ycqlsh:example> CREATE TABLE tracking (id int PRIMARY KEY) WITH tablets = 10; -``` - -If you create an index for these tables, you can also specify the number of tablets for the index. - -You can also use `AND` to add other table properties, like in this example. - -```sql -ycqlsh:example> CREATE TABLE tracking (id int PRIMARY KEY) WITH tablets = 10 AND transactions = { 'enabled' : true }; -``` - -## See also - -- [`ALTER TABLE`](../ddl_alter_table) -- [`DELETE`](../dml_delete/) -- [`DROP TABLE`](../ddl_drop_table) -- [`INSERT`](../dml_insert) -- [`SELECT`](../dml_select/) -- [`UPDATE`](../dml_update/) diff --git a/docs/content/preview/api/ycql/ddl_create_type.md b/docs/content/preview/api/ycql/ddl_create_type.md deleted file mode 100644 index 634f76480b65..000000000000 --- a/docs/content/preview/api/ycql/ddl_create_type.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: CREATE TYPE statement [YCQL] -headerTitle: CREATE TYPE -linkTitle: CREATE TYPE -description: Use the CREATE TYPE statement to create a new user-defined data type in a keyspace. -menu: - preview_api: - parent: api-cassandra - weight: 1250 -aliases: - - /preview/api/cassandra/ddl_create_type - - /preview/api/ycql/ddl_create_type -type: docs ---- - -## Synopsis - -Use the `CREATE TYPE` statement to create a new user-defined data type in a keyspace. It defines the name of the user-defined type and the names and data types for its fields. - -## Syntax - -### Diagram - -CREATETYPEIFNOTEXISTStype_name(,field_namefield_type) - -### Grammar - -```ebnf -create_type ::= CREATE TYPE [ IF NOT EXISTS ] type_name - (field_name field_type [ ',' field_name field_type ...]); -``` - -Where - -- `type_name` and `field_name` are identifiers (`type_name` may be qualified with a keyspace name). -- `field_type` is a data type. - -## Semantics - -- An error is raised if the specified `type_name` already exists in the associated keyspace unless the `IF NOT EXISTS` option is used. -- Each `field_name` must each be unique (a type cannot have two fields of the same name). -- Each `field_type` must be either a [non-parametric type](../#data-types) or a [frozen type](../type_frozen). - -## Examples - -Collection types must be frozen to be used inside a user-defined type. - -```sql -ycqlsh:example> CREATE TYPE person(first_name TEXT, last_name TEXT, emails FROZEN>); -``` - -```sql -ycqlsh:example> DESCRIBE TYPE person; -``` - -```output -CREATE TYPE example.person ( - first_name text, - last_name text, - emails frozen> -); -``` - -```sql -ycqlsh:example> CREATE TABLE employees(employee_id INT PRIMARY KEY, employee person); -``` - -```sql -ycqlsh:example> INSERT INTO employees(employee_id, employee) - VALUES (1, {first_name : 'John', last_name : 'Doe', emails : ['jdoe@example.com']}); -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - employee_id | employee --------------+--------------------------------------------------------------------------- - 1 | {first_name: 'John', last_name: 'Doe', emails: ['john.doe@yugabyte.com']} - -``` - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`DROP TYPE`](../ddl_drop_type) diff --git a/docs/content/preview/api/ycql/ddl_drop_index.md b/docs/content/preview/api/ycql/ddl_drop_index.md deleted file mode 100644 index 2196fda039f5..000000000000 --- a/docs/content/preview/api/ycql/ddl_drop_index.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: DROP INDEX statement [YCQL] -headerTitle: DROP INDEX -linkTitle: DROP INDEX -description: Use the DROP INDEX statement to remove an index and all of its data from the database. -menu: - preview_api: - parent: api-cassandra - weight: 1255 -aliases: - - /preview/api/cassandra/ddl_drop_index - - /preview/api/ycql/ddl_drop_index -type: docs ---- - -## Synopsis - -Use the `DROP INDEX` statement to remove an index and all of its data from the database. - -## Syntax - -### Diagram - -DROPINDEXIFEXISTSindex_name - -### Grammar - -```ebnf -drop_index ::= DROP INDEX [ IF EXISTS ] index_name; -``` - -Where - -- `index_name` is an identifier (possibly qualified with a keyspace name). - -## Semantics - -- An error is raised if the specified `index_name` does not exist unless `IF EXISTS` option is present. -- Associated objects to `index_name` such as prepared statements will be eventually invalidated after the drop statement is completed. - -## Examples - -```sql -ycqlsh:example> CREATE TABLE users(id INT PRIMARY KEY, name TEXT) WITH transactions = { 'enabled' : true }; -``` - -```sql -ycqlsh:example> CREATE INDEX users_by_name ON users(name); -``` - -```sql -ycqlsh:example> DROP INDEX users_by_name; -``` - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`CREATE INDEX`](../ddl_create_index/) diff --git a/docs/content/preview/api/ycql/ddl_drop_table.md b/docs/content/preview/api/ycql/ddl_drop_table.md deleted file mode 100644 index e73975002c5a..000000000000 --- a/docs/content/preview/api/ycql/ddl_drop_table.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: DROP TABLE statement [YCQL] -headerTitle: DROP TABLE -linkTitle: DROP TABLE -description: Use the DROP TABLE statement to remove a table and all of its data from the database. -menu: - preview_api: - parent: api-cassandra - weight: 1270 -aliases: - - /preview/api/cassandra/ddl_drop_table - - /preview/api/ycql/ddl_drop_table -type: docs ---- - -## Synopsis - -Use the `DROP TABLE` statement to remove a table and all of its data from the database. - -## Syntax - -### Diagram - -DROPTABLEIFEXISTStable_name - -### Grammar - -```ebnf -drop_table ::= DROP TABLE [ IF EXISTS ] table_name; -``` - -Where - -- `table_name` is an identifier (possibly qualified with a keyspace name). - -## Semantics - -- An error is raised if the specified `table_name` does not exist unless `IF EXISTS` option is present. -- Associated objects to `table_name` such as prepared statements will be eventually invalidated after the drop statement is completed. - -## Examples - -```sql -ycqlsh:example> CREATE TABLE users(id INT PRIMARY KEY, name TEXT); -``` - -```sql -ycqlsh:example> DROP TABLE users; -``` - -## See also - -- [`ALTER TABLE`](../ddl_alter_table) -- [`CREATE TABLE`](../ddl_create_table) -- [`DELETE`](../dml_delete/) -- [`INSERT`](../dml_insert) -- [`SELECT`](../dml_select/) -- [`UPDATE`](../dml_update/) diff --git a/docs/content/preview/api/ycql/ddl_drop_type.md b/docs/content/preview/api/ycql/ddl_drop_type.md deleted file mode 100644 index a38741ec544c..000000000000 --- a/docs/content/preview/api/ycql/ddl_drop_type.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: DROP TYPE statement [YCQL] -headerTitle: DROP TYPE -linkTitle: DROP TYPE -description: Use the DROP TYPE statement to remove an existing user-defined data type. -menu: - preview_api: - parent: api-cassandra - weight: 1280 -aliases: - - /preview/api/cassandra/ddl_drop_type - - /preview/api/ycql/ddl_drop_type -type: docs ---- - -## Synopsis - -Use the `DROP TYPE` statement to remove an existing user-defined data type. - -## Syntax - -### Diagram - -DROPTYPEIFEXISTStype_name - -### Grammar - -```ebnf -drop_type ::= DROP TYPE [ IF EXISTS ] type_name; -``` - -Where - -- `type_name` is an identifier (possibly qualified with a keyspace name). - -## Semantics - -- An error is raised if the specified `type_name` does not exist unless `IF EXISTS` option is used. -- A user-defined `type_name` cannot be dropped if it is currently used in a table or another type. - -## Examples - -```sql -ycqlsh:example> CREATE TYPE person(first_name TEXT, last_name TEXT, email TEXT); -``` - -```sql -ycqlsh:example> DROP TYPE person; -``` - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`DROP KEYSPACE`](../ddl_drop_keyspace) diff --git a/docs/content/preview/api/ycql/dml_delete.md b/docs/content/preview/api/ycql/dml_delete.md deleted file mode 100644 index 4ad9057a75c7..000000000000 --- a/docs/content/preview/api/ycql/dml_delete.md +++ /dev/null @@ -1,294 +0,0 @@ ---- -title: DELETE statement [YCQL] -headerTitle: DELETE -linkTitle: DELETE -description: Use the DELETE statement to remove rows from a specified table that meet a given condition. -menu: - preview_api: - parent: api-cassandra - weight: 1330 -aliases: - - /preview/api/cassandra/ddl_delete - - /preview/api/ycql/ddl_delete -type: docs ---- - -## Synopsis - -Use the `DELETE` statement to remove rows from a specified table that meet a given condition. - -## Syntax - -### Diagram - -DELETEFROMtable_nameUSINGTIMESTAMPtimestamp_expressionWHEREwhere_expressionIFNOTEXISTSif_expressionRETURNS STATUS AS ROW - -### Grammar - -```ebnf -delete ::= DELETE FROM table_name - [ USING TIMESTAMP timestamp_expression ] WHERE - where_expression [ IF { [ NOT ] EXISTS | if_expression } ] - [ RETURNS STATUS AS ROW ] -``` - -Where - -- `table_name` is an identifier (possibly qualified with a keyspace name). -- Restrictions on `where_expression` and `if_expression` are covered in the Semantics section. -- See [Expressions](..#expressions) for more information on syntax rules. - -## Semantics - -- An error is raised if the specified `table_name` does not exist. -- The `where_expression` and `if_expression` must evaluate to [boolean](../type_bool) values. -- The `USING TIMESTAMP` clause indicates you would like to perform the DELETE as if it was done at the - timestamp provided by the user. The timestamp is the number of microseconds since epoch. -- **Note**: You should either use the `USING TIMESTAMP` clause in all of your statements or none of - them. Using a mix of statements where some have `USING TIMESTAMP` and others do not will lead to - very confusing results. -- `DELETE` is always done at `QUORUM` consistency level irrespective of setting. - -### WHERE Clause - -- The `where_expression` must specify conditions for all primary-key columns. -- The `where_expression` must not specify conditions for any regular columns. -- The `where_expression` can only apply `AND` and `=` operators. Other operators are not yet supported. - -### IF Clause - -- The `if_expression` can only apply to non-key columns (regular columns). -- The `if_expression` can contain any logical and boolean operators. -- Deleting only some column values from a row is not yet supported. -- `IF EXISTS` and `IF NOT EXISTS` options are mostly for symmetry with the [`INSERT`](../dml_insert) and [`UPDATE`](../dml_update/) commands. - - `IF EXISTS` works like a normal delete but additionally returns whether the delete was applied (a row was found with that primary key). - - `IF NOT EXISTS` is effectively a no-op since rows that do not exist cannot be deleted (but returns whether no row was found with that primary key). - -### `USING` Clause - -The `timestamp_expression` must be an integer value (or a bind variable marker for prepared statements). - -## Examples - -### Delete a row from a table - -```sql -ycqlsh:example> CREATE TABLE employees(department_id INT, - employee_id INT, - name TEXT, - PRIMARY KEY(department_id, employee_id)); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 1, 'John'); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 2, 'Jane'); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 1, 'Joe'); -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 1 | 1 | John - 1 | 2 | Jane - 2 | 1 | Joe -``` - -Delete statements identify rows by the primary key columns. - -```sql -ycqlsh:example> DELETE FROM employees WHERE department_id = 1 AND employee_id = 1; -``` - -Deletes on non-existent rows are no-ops. - -```sql -ycqlsh:example> DELETE FROM employees WHERE department_id = 3 AND employee_id = 1; -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 1 | 2 | Jane - 2 | 1 | Joe -``` - -### Conditional delete using the `IF` clause - -'IF' clause conditions will return whether they were applied or not. - -```sql -ycqlsh:example> DELETE FROM employees WHERE department_id = 2 AND employee_id = 1 IF name = 'Joe'; -``` - -```output - [applied] ------------ - True -``` - -```sql -ycqlsh:example> DELETE FROM employees WHERE department_id = 3 AND employee_id = 1 IF EXISTS; -``` - -```output - [applied] ------------ - False -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 1 | 2 | Jane -``` - -### Delete several rows with the same partition key - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 1, 'John'); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 1, 'Joe'); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 2, 'Jack'); -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 1 | 1 | John - 1 | 2 | Jane - 2 | 1 | Joe - 2 | 2 | Jack -``` - -Delete all entries for a partition key. - -```sql -ycqlsh:example> DELETE FROM employees WHERE department_id = 1; -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 2 | 1 | Joe - 2 | 2 | Jack -``` - -Delete a range of entries within a partition key. - -```sql -ycqlsh:example> DELETE FROM employees WHERE department_id = 2 AND employee_id >= 2 AND employee_id < 4; -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 2 | 1 | Joe -``` - -### Delete with the `USING TIMESTAMP` clause - -You can do this as follows: - -```sql -ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name) VALUES (4, 4, 'Ted') USING TIMESTAMP 1000; -``` - -```sql -ycqlsh:foo> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 4 | 4 | Ted - 2 | 1 | Joe - -(2 rows) -``` - -```sql -ycqlsh:foo> DELETE FROM employees USING TIMESTAMP 500 WHERE department_id = 4 AND employee_id = 4; -``` - -Not applied since timestamp is lower than 1000 - -```sql -ycqlsh:foo> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 4 | 4 | Ted - 2 | 1 | Joe - -(2 rows) -``` - -```sql -ycqlsh:foo> DELETE FROM employees USING TIMESTAMP 1500 WHERE department_id = 4 AND employee_id = 4; -``` - -Applied since timestamp is higher than 1000. - -```sql -ycqlsh:foo> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 2 | 1 | Joe - -(1 rows) -``` - -### RETURNS STATUS AS ROW - -When executing a batch in YCQL, the protocol returns only one error or return status. The `RETURNS STATUS AS ROW` feature addresses this limitation and adds a status row for each statement. - -See examples in [batch docs](../batch#row-status). - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`INSERT`](../dml_insert) -- [`SELECT`](../dml_select/) -- [`UPDATE`](../dml_update/) -- [`TRUNCATE`](../dml_truncate) -- [`Expression`](..#expressions) diff --git a/docs/content/preview/api/ycql/dml_insert.md b/docs/content/preview/api/ycql/dml_insert.md deleted file mode 100644 index aa8efabda674..000000000000 --- a/docs/content/preview/api/ycql/dml_insert.md +++ /dev/null @@ -1,275 +0,0 @@ ---- -title: INSERT statement [YCQL] -headerTitle: INSERT -linkTitle: INSERT -description: Use the INSERT statement to add a row to a specified table. -menu: - preview_api: - parent: api-cassandra - weight: 1300 -aliases: - - /preview/api/cassandra/dml_insert - - /preview/api/ycql/dml_insert -type: docs ---- - -## Synopsis - -Use the `INSERT` statement to add a row to a specified table. - -## Syntax - -### Diagram - -INSERTINTOtable_name(,column_name)VALUES(,expression)IFNOTEXISTSif_expressionUSINGusing_expressionRETURNS STATUS AS ROW - -### using_expression - -```ebnf -using_expression = ttl_or_timestamp_expression { 'AND' ttl_or_timestamp_expression }; -``` - -ANDttl_or_timestamp_expression - -### ttl_or_timestamp_expression - -```ebnf -ttl_or_timestamp_expression = 'TTL' ttl_expression | 'TIMESTAMP' timestamp_expression; -``` - -TTLttl_expressionTIMESTAMPtimestamp_expression - -### Grammar - -```ebnf -insert ::= INSERT INTO table_name ( column_name [ , ... ] ) VALUES ( - expression [ , ... ] ) - [ IF { [ NOT ] EXISTS | if_expression } ] - [ USING using_expression ] - [ RETURNS STATUS AS ROW ] -``` - -Where - -- `table_name` and `column` are identifiers (`table_name` may be qualified with a keyspace name). -- `value` can be any expression although Apache Cassandra requires that `value`s must be literals. -- Restrictions for `if_expression` and `ttl_expression` are covered in the Semantics section. -- See [Expressions](..#expressions) for more information on syntax rules. - -## Semantics - -- An error is raised if the specified `table_name` does not exist. -- The columns list must include all primary key columns. -- The `USING TIMESTAMP` clause indicates you would like to perform the INSERT as if it was done at the - timestamp provided by the user. The timestamp is the number of microseconds since epoch. -- By default `INSERT` has `upsert` semantics, that is, if the row already exists, it behaves like an `UPDATE`. If pure - `INSERT` semantics is desired then the `IF NOT EXISTS` clause can be used to make sure an existing row is not overwritten by the `INSERT`. -- **Note**: You should either use the `USING TIMESTAMP` clause in all of your statements or none of - them. Using a mix of statements where some have `USING TIMESTAMP` and others do not will lead to - very confusing results. -- Inserting rows with TTL is not supported on tables with [transactions enabled](./../ddl_create_table#table-properties-1). -- `INSERT` is always done at `QUORUM` consistency level irrespective of setting. - -### `VALUES` clause - -- The values list must have the same length as the columns list. -- Each value must be convertible to its corresponding (by position) column type. -- Each value literal can be an expression that evaluates to a simple value. - -### `IF` clause - -- The `if_expression` can only apply to non-key columns (regular columns). -- The `if_expression` can contain any logical and boolean operators. - -### `USING` clause - -- `ttl_expression` must be an integer value (or a bind variable marker for prepared statements). -- `timestamp_expression` must be an integer value (or a bind variable marker for prepared statements). - -## Examples - -### Insert a row into a table - -```sql -ycqlsh:example> CREATE TABLE employees(department_id INT, - employee_id INT, - name TEXT, - PRIMARY KEY(department_id, employee_id)); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 1, 'John'); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 2, 'Jane'); -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 1 | 1 | John - 1 | 2 | Jane -``` - -### Conditional insert using the `IF` clause - -Example 1 - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 1, 'Joe') IF name = null; -``` - -```output - [applied] ------------ - True -``` - -Example 2 - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 1, 'Jack') IF NOT EXISTS; -``` - -```output - [applied] ------------ - False -``` - -Example 3 - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 2 | 1 | Joe - 1 | 1 | John - 1 | 2 | Jane -``` - -### Insert a row with expiration time using the `USING TTL` clause - -You can do this as follows: - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name) VALUES (2, 2, 'Jack') USING TTL 10; -``` - -Now query the employees table. - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 2 | 1 | Joe - 2 | 2 | Jack - 1 | 1 | John - 1 | 2 | Jane -``` - -Again query the employees table after 11 seconds or more. - -```sql -ycqlsh:example> SELECT * FROM employees; -- 11 seconds after the insert. -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 2 | 1 | Joe - 1 | 1 | John - 1 | 2 | Jane -``` - -### Insert a row with `USING TIMESTAMP` clause - -#### Insert a row with a low timestamp - -```sql -ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 3, 'Jeff') USING TIMESTAMP 1000; -``` - -Now query the employees table. - -```sql -ycqlsh:foo> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------ - 1 | 1 | John - 1 | 2 | Jane - 1 | 3 | Jeff - 2 | 1 | Joe - -(4 rows) -``` - -#### Overwrite the row with a higher timestamp - -```sql -ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 3, 'Jerry') USING TIMESTAMP 2000; -``` - -```sql -ycqlsh:foo> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------- - 1 | 1 | John - 1 | 2 | Jane - 1 | 3 | Jerry - 2 | 1 | Joe - -(4 rows) -``` - -#### Try to overwrite the row with a lower timestamp - -```sql -ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name) VALUES (1, 3, 'James') USING TIMESTAMP 1500; -``` - -```sql -ycqlsh:foo> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name ----------------+-------------+------- - 1 | 1 | John - 1 | 2 | Jane - 1 | 3 | Jerry - 2 | 1 | Joe - -(4 rows) -``` - -### RETURNS STATUS AS ROW - -When executing a batch in YCQL, the protocol returns only one error or return status. The `RETURNS STATUS AS ROW` feature addresses this limitation and adds a status row for each statement. - -See examples in [batch docs](../batch#row-status). - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`DELETE`](../dml_delete/) -- [`SELECT`](../dml_select/) -- [`UPDATE`](../dml_update/) -- [`Expression`](..#expressions) diff --git a/docs/content/preview/api/ycql/dml_select.md b/docs/content/preview/api/ycql/dml_select.md deleted file mode 100644 index fa92929c997c..000000000000 --- a/docs/content/preview/api/ycql/dml_select.md +++ /dev/null @@ -1,315 +0,0 @@ ---- -title: SELECT statement [YCQL] -headerTitle: SELECT -linkTitle: SELECT -description: Use the SELECT statement to retrieve (part of) rows of specified columns that meet a given condition from a table. -menu: - preview_api: - parent: api-cassandra - weight: 1310 -aliases: - - /preview/api/cassandra/dml_select - - /preview/api/ycql/dml_select -type: docs ---- - -## Synopsis - -Use the `SELECT` statement to retrieve (part of) rows of specified columns that meet a given condition from a table. It specifies the columns to be retrieved, the name of the table, and the condition each selected row must satisfy. - -## Syntax - -### Diagram - -#### select - -SELECTDISTINCT*,column_nameFROMtable_nameWHEREwhere_expressionALLOW FILTERINGIFif_expressionORDER BYorder_expressionLIMITlimit_expressionOFFSEToffset_expression - -#### order_expression - -(,column_nameASCDESC) - -### Grammar - -```ebnf -select ::= SELECT [ DISTINCT ] { * | column_name [ , column_name ... ] } - FROM table_name - [ WHERE where_expression ] - [ IF where_expression ] - [ ORDER BY order_expression ] - [ LIMIT limit_expression ] [ OFFSET offset_expression ] - -order_expression ::= ( { column_name [ ASC | DESC ] } [ , ... ] ) -``` - -Where - -- `table_name` and `column_name` are identifiers (`table_name` may be qualified with a keyspace name). -- `limit_expression` is an integer literal (or a bind variable marker for prepared statements). -- Restrictions for `where_expression` are discussed in the Semantics section. -- See [Expressions](..#expressions) for more information on syntax rules. - -## Semantics - -- An error is raised if the specified `table_name` does not exist. -- `SELECT DISTINCT` can only be used for partition columns or static columns. -- `*` means all columns of the table will be retrieved. -- `LIMIT` clause sets the maximum number of results (rows) to be returned. -- `OFFSET` clause sets the number of rows to be skipped before returning results. -- `ALLOW FILTERING` is provided for syntax compatibility with Cassandra. You can always filter on all columns. -- Reads default to `QUORUM` and read from the tablet-leader. -- To read from followers use `ONE` consistency level. -- To benefit from local reads, in addition to specifying the consistency level of `ONE`, set the `region` also in the client driver to indicate where the request is coming from, and it should match the `--placement_region` argument for the yb-tservers in that region. - -### `ORDER BY` clause - -- The `ORDER BY` clause sets the order for the returned results. -- Only clustering columns are allowed in the `order_expression`. -- For a given column, `DESC` means descending order and `ASC` or omitted means ascending order. -- Currently, only two overall orderings are allowed, the clustering order from the `CREATE TABLE` statement (forward scan) or its opposite (reverse scan). - -### `WHERE` clause - -- The `where_expression` must evaluate to boolean values. -- The `where_expression` can specify conditions on any columns including partition, clustering, and regular columns. -- The `where_expression` has a restricted list of operators. - - - Only `=`, `!=`, `IN` and `NOT IN` operators can be used for conditions on partition columns. - - Only operators `=`, `!=`, `<`, `<=`, `>`, `>=`, `IN` and `NOT IN` can be used for conditions on clustering and regular columns. - - Only `IN` operator can be used for conditions on tuples of clustering columns. - -### `IF` clause - -- The `if_expression` must evaluate to boolean values. -- The `if_expression` supports any combinations of all available boolean and logical operators. -- The `if_expression` can only specify conditions for non-primary-key columns although it can used on a key column of a secondary index. -- While WHERE condition is used to generate efficient query plan, the IF condition is not. ALL rows that satisfy WHERE condition will be read from the database before the IF condition is used to filter unwanted data. In the following example, although the two queries yield the same result set, SELECT with WHERE clause will use INDEX-SCAN while SELECT with IF clause will use FULL-SCAN. - -```cql -SELECT * FROM a_table WHERE key = 'my_key'; -SELECT * FROM a_table IF key = 'my_key'; -``` - -{{< note title="Note" >}} -While the where clause allows a wide range of operators, the exact conditions used in the where clause have significant performance considerations (especially for large datasets). -Some best practices are: - -- Use equality conditions on all partition columns (to fix the value of the partition key). -- Use comparison operators on the clustering columns (tighter restrictions are more valuable for left-most clustering columns). -- Generally, the closer a column is to the beginning of the primary key, the higher the performance gain for setting tighter restrictions on it. - -Ideally, these performance considerations should be taken into account when creating the table schema.{{< /note >}} - -## Examples - -### Select all rows from a table - -```sql -ycqlsh:example> CREATE TABLE employees(department_id INT, - employee_id INT, - dept_name TEXT STATIC, - employee_name TEXT, - PRIMARY KEY(department_id, employee_id)); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, dept_name, employee_name) - VALUES (1, 1, 'Accounting', 'John'); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, dept_name, employee_name) - VALUES (1, 2, 'Accounting', 'Jane'); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, dept_name, employee_name) - VALUES (1, 3, 'Accounting', 'John'); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, dept_name, employee_name) - VALUES (2, 1, 'Marketing', 'Joe'); -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | dept_name | employee_name ----------------+-------------+------------+--------------- - 1 | 1 | Accounting | John - 1 | 2 | Accounting | Jane - 1 | 3 | Accounting | John - 2 | 1 | Marketing | Joe -``` - -### Select with limit - -```sql -ycqlsh:example> SELECT * FROM employees LIMIT 2; -``` - -```output - department_id | employee_id | dept_name | employee_name ----------------+-------------+------------+--------------- - 1 | 1 | Accounting | John - 1 | 2 | Accounting | Jane -``` - -### Select with offset - -```sql -ycqlsh:example> SELECT * FROM employees LIMIT 2 OFFSET 1; -``` - -```output - department_id | employee_id | dept_name | employee_name ----------------+-------------+------------+--------------- - 1 | 2 | Accounting | Jane - 1 | 3 | Accounting | John -``` - -### Select distinct values - -```sql -ycqlsh:example> SELECT DISTINCT dept_name FROM employees; -``` - -```output - dept_name ------------- - Accounting - Marketing -``` - -### Select with a condition on the partitioning column - -```sql -ycqlsh:example> SELECT * FROM employees WHERE department_id = 2; -``` - -```output - department_id | employee_id | dept_name | employee_name ----------------+-------------+-----------+--------------- - 2 | 1 | Marketing | Joe -``` - -### Select with condition on the clustering column - -```sql -ycqlsh:example> SELECT * FROM employees WHERE department_id = 1 AND employee_id <= 2; -``` - -```output - department_id | employee_id | dept_name | employee_name ----------------+-------------+------------+--------------- - 1 | 1 | Accounting | John - 1 | 2 | Accounting | Jane -``` - -### Select with condition on a regular column, using WHERE clause - -```sql -ycqlsh:example> SELECT * FROM employees WHERE employee_name = 'John'; -``` - -```output - department_id | employee_id | dept_name | employee_name ----------------+-------------+------------+--------------- - 1 | 1 | Accounting | John - 1 | 3 | Accounting | John -``` - -### Select with condition on a regular column, using IF clause - -```sql -ycqlsh:example> SELECT * FROM employees WHERE department_id = 1 IF employee_name != 'John'; -``` - -```output - department_id | employee_id | dept_name | employee_name ----------------+-------------+------------+--------------- - 1 | 2 | Accounting | Jane -``` - -### Select with `ORDER BY` clause - -``` sql -ycqlsh:example> CREATE TABLE sensor_data(device_id INT, - sensor_id INT, - ts TIMESTAMP, - value TEXT, - PRIMARY KEY((device_id), sensor_id, ts)) WITH CLUSTERING ORDER BY (sensor_id ASC, ts DESC); -``` - -```sql -ycqlsh:example> INSERT INTO sensor_data(device_id, sensor_id, ts, value) - VALUES (1, 1, '2018-1-1 12:30:30 UTC', 'a'); -``` - -```sql -ycqlsh:example> INSERT INTO sensor_data(device_id, sensor_id, ts, value) - VALUES (1, 1, '2018-1-1 12:30:31 UTC', 'b'); -``` - -```sql -ycqlsh:example> INSERT INTO sensor_data(device_id, sensor_id, ts, value) - VALUES (1, 2, '2018-1-1 12:30:30 UTC', 'x'); -``` - -```sql -ycqlsh:example> INSERT INTO sensor_data(device_id, sensor_id, ts, value) - VALUES (1, 2, '2018-1-1 12:30:31 UTC', 'y'); -``` - -Reverse scan, opposite of the table's clustering order. - -```sql -ycqlsh:example> SELECT * FROM sensor_data WHERE device_id = 1 ORDER BY sensor_id DESC, ts ASC; -``` - -```output - device_id | sensor_id | ts | value ------------+-----------+---------------------------------+------- - 1 | 2 | 2018-01-01 12:30:30.000000+0000 | x - 1 | 2 | 2018-01-01 12:30:31.000000+0000 | y - 1 | 1 | 2018-01-01 12:30:30.000000+0000 | a - 1 | 1 | 2018-01-01 12:30:31.000000+0000 | b -``` - -Forward scan, same as a SELECT without an ORDER BY clause. - -```sql -ycqlsh:example> SELECT * FROM sensor_data WHERE device_id = 1 ORDER BY sensor_id ASC, ts DESC; -``` - -```output - device_id | sensor_id | ts | value ------------+-----------+---------------------------------+------- - 1 | 1 | 2018-01-01 12:30:31.000000+0000 | b - 1 | 1 | 2018-01-01 12:30:30.000000+0000 | a - 1 | 2 | 2018-01-01 12:30:31.000000+0000 | y - 1 | 2 | 2018-01-01 12:30:30.000000+0000 | x -``` - -Other orderings are not allowed. - -```sql -ycqlsh:example> SELECT * FROM sensor_data WHERE device_id = 1 ORDER BY sensor_id ASC, ts ASC; -``` - -```output -InvalidRequest: Unsupported order by relation -SELECT * FROM sensor_data WHERE device_id = 1 ORDER BY sensor_id ASC, ts ASC; - ^^^^^^^^^^^^^^^^^^^^^ -``` - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`INSERT`](../dml_insert) -- [`UPDATE`](../dml_update/) -- [`DELETE`](../dml_delete/) -- [`Expression`](..#expressions) diff --git a/docs/content/preview/api/ycql/dml_update.md b/docs/content/preview/api/ycql/dml_update.md deleted file mode 100644 index 20b023cc7b19..000000000000 --- a/docs/content/preview/api/ycql/dml_update.md +++ /dev/null @@ -1,283 +0,0 @@ ---- -title: UPDATE statement [YCQL] -headerTitle: UPDATE -linkTitle: UPDATE -description: Use the UPDATE statement to update one or more column values for a row in table. -menu: - preview_api: - parent: api-cassandra - weight: 1320 -aliases: - - /preview/api/cassandra/dml_update/ - - /preview/api/ycql/dml_update/ - - /preview/api/ysql/dml_update/ -type: docs ---- - -## Synopsis - -Use the `UPDATE` statement to update one or more column values for a row in table. - -{{< note title="Note" >}} - -YugabyteDB can only update one row at a time. Updating multiple rows is currently not supported. - -{{< /note >}} - -## Syntax - -### Diagram - -UPDATEtable_nameUSINGusing_expressionSET,assignmentWHEREwhere_expressionIFif_expressionNOTEXISTSif_expressionORNOTEXISTSRETURNS STATUS AS ROW - -### using_expression - -```ebnf -using_expression = ttl_or_timestamp_expression { 'AND' ttl_or_timestamp_expression }; -``` - -ANDttl_or_timestamp_expression - -### ttl_or_timestamp_expression - -```ebnf -ttl_or_timestamp_expression = 'TTL' ttl_expression | 'TIMESTAMP' timestamp_expression; -``` - -TTLttl_expressionTIMESTAMPtimestamp_expression - -```ebnf -update ::= UPDATE table_name [ USING using_expression ] SET assignment - [ , ... ] WHERE where_expression - [ IF { if_expression - | [ NOT ] EXISTS - | if_expression OR [ NOT ] EXISTS } ] - [ RETURNS STATUS AS ROW ] - - -assignment ::= { column_name | column_name'['index_expression']' } '=' expression -``` - -Where - -- `table_name` is an identifier (possibly qualified with a keyspace name). -- Restrictions for `ttl_expression`, `where_expression`, and `if_expression` are covered in the Semantics section. -- See [Expressions](..#expressions) for more information on syntax rules. - -## Semantics - -- An error is raised if the specified `table_name` does not exist. -- Update statement uses _upsert semantics_, meaning it inserts the row being updated if it does not already exists. -- The `USING TIMESTAMP` clause indicates you would like to perform the UPDATE as if it was done at the - timestamp provided by the user. The timestamp is the number of microseconds since epoch. -- **Note**: You should either use the `USING TIMESTAMP` clause in all of your statements or none of - them. Using a mix of statements where some have `USING TIMESTAMP` and others do not will lead to - very confusing results. -- Updating rows `USING TTL` is not supported on tables with [transactions enabled](./../ddl_create_table#table-properties-1). -- You cannot update the columns in the primary key. As a workaround, you have to delete the row and insert a new row. -- `UPDATE` is always done at `QUORUM` consistency level irrespective of setting. - -### `WHERE` clause - -- The `where_expression` and `if_expression` must evaluate to boolean values. -- The `where_expression` must specify conditions for all primary-key columns. -- The `where_expression` must not specify conditions for any regular columns. -- The `where_expression` can only apply `AND` and `=` operators. Other operators are not yet supported. - -### `IF` clause - -- The `if_expression` can only apply to non-key columns (regular columns). -- The `if_expression` can contain any logical and boolean operators. - -### `USING` clause - -- `ttl_expression` must be an integer value (or a bind variable marker for prepared statements). -- `timestamp_expression` must be an integer value (or a bind variable marker for prepared statements). - -## Examples - -### Update a value in a table - -```sql -ycqlsh:example> CREATE TABLE employees(department_id INT, - employee_id INT, - name TEXT, - age INT, - PRIMARY KEY(department_id, employee_id)); -``` - -```sql -ycqlsh:example> INSERT INTO employees(department_id, employee_id, name, age) VALUES (1, 1, 'John', 30); -``` - -Update the value of a non primary-key column. - -```sql -ycqlsh:example> UPDATE employees SET name = 'Jack' WHERE department_id = 1 AND employee_id = 1; -``` - -Using upsert semantics to update a non-existent row (that is, insert the row). - -```sql -ycqlsh:example> UPDATE employees SET name = 'Jane', age = 40 WHERE department_id = 1 AND employee_id = 2; -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name | age ----------------+-------------+------+----- - 1 | 1 | Jack | 30 - 1 | 2 | Jane | 40 -``` - -### Conditional update using the `IF` clause - -The supported expressions are allowed in the 'SET' assignment targets. - -```sql -ycqlsh:example> UPDATE employees SET age = age + 1 WHERE department_id = 1 AND employee_id = 1 IF name = 'Jack'; -``` - -```output - [applied] ------------ - True -``` - -Using upsert semantics to add a row, age is not set so will be 'null'. - -```sql -ycqlsh:example> UPDATE employees SET name = 'Joe' WHERE department_id = 2 AND employee_id = 1 IF NOT EXISTS; -``` - -```output - [applied] ------------ - True -``` - -```sql -ycqlsh:example> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name | age ----------------+-------------+------+------ - 2 | 1 | Joe | null - 1 | 1 | Jack | 31 - 1 | 2 | Jane | 40 -``` - -### Update with expiration time using the `USING TTL` clause - -The updated values will persist for the TTL duration. - -```sql -ycqlsh:example> UPDATE employees USING TTL 10 SET age = 32 WHERE department_id = 1 AND employee_id = 1; -``` - -```sql -ycqlsh:example> SELECT * FROM employees WHERE department_id = 1 AND employee_id = 1; -``` - -```output - department_id | employee_id | name | age ----------------+-------------+------+------ - 1 | 1 | Jack | 32 -``` - -11 seconds after the update (value will have expired). - -```sql -ycqlsh:example> SELECT * FROM employees WHERE department_id = 1 AND employee_id = 1; -``` - -```output - department_id | employee_id | name | age ----------------+-------------+------+------ - 1 | 1 | Jack | null -``` - -### Update row with the `USING TIMESTAMP` clause - -You can do this as follows: - -```sql -ycqlsh:foo> INSERT INTO employees(department_id, employee_id, name, age) VALUES (1, 4, 'Jeff', 20) USING TIMESTAMP 1000; -``` - -```sql -ycqlsh:foo> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name | age ----------------+-------------+------+------ - 1 | 1 | Jack | null - 1 | 2 | Jane | 40 - 1 | 4 | Jeff | 20 - 2 | 1 | Joe | null - -(4 rows) -``` - -Now update the employees table. - -```sql -ycqlsh:foo> UPDATE employees USING TIMESTAMP 500 SET age = 30 WHERE department_id = 1 AND employee_id = 4; -``` - -Not applied since timestamp is lower than 1000. - -```sql -ycqlsh:foo> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name | age ----------------+-------------+------+------ - 1 | 1 | Jack | null - 1 | 2 | Jane | 40 - 1 | 4 | Jeff | 20 - 2 | 1 | Joe | null - -(4 rows) -``` - -```sql -ycqlsh:foo> UPDATE employees USING TIMESTAMP 1500 SET age = 30 WHERE department_id = 1 AND employee_id = 4; -``` - -Applied since timestamp is higher than 1000. - -```sql -ycqlsh:foo> SELECT * FROM employees; -``` - -```output - department_id | employee_id | name | age ----------------+-------------+------+------ - 1 | 1 | Jack | null - 1 | 2 | Jane | 40 - 1 | 4 | Jeff | 30 - 2 | 1 | Joe | null - -(4 rows) -``` - -### RETURNS STATUS AS ROW - -When executing a batch in YCQL, the protocol returns only one error or return status. The `RETURNS STATUS AS ROW` feature addresses this limitation and adds a status row for each statement. - -See examples in [batch docs](../batch#row-status). - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`DELETE`](../dml_delete/) -- [`INSERT`](../dml_insert) -- [`SELECT`](../dml_select/) -- [`Expression`](..#expressions) diff --git a/docs/content/preview/api/ycql/type_bool.md b/docs/content/preview/api/ycql/type_bool.md deleted file mode 100644 index cd6da66c1972..000000000000 --- a/docs/content/preview/api/ycql/type_bool.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -title: BOOLEAN data type [YCQL] -headerTitle: BOOLEAN data type -linkTitle: BOOLEAN -description: Use the `BOOLEAN` data type to specify values of either "true" or "false". -menu: - preview_api: - parent: api-cassandra - weight: 1380 -aliases: - - /preview/api/cassandra/type_bool - - /preview/api/ycql/type_bool -type: docs ---- - -## Synopsis - -Use the `BOOLEAN` data type to specify values of either `true` or `false`. - -## Syntax - -``` -type_specification ::= BOOLEAN - -boolean_literal ::= TRUE | FALSE -``` - -## Semantics - -- Columns of type `BOOLEAN` cannot be part of the `PRIMARY KEY`. -- Columns of type `BOOLEAN` can be set, inserted, and compared. -- In `WHERE` and `IF` clause, `BOOLEAN` columns cannot be used as a standalone expression. They must be compared with either `true` or `false`. For example, `WHERE boolean_column = TRUE` is valid while `WHERE boolean_column` is not. -- Implicitly, `BOOLEAN` is neither comparable nor convertible to any other data types. - -## Examples - -```sql -ycqlsh:example> CREATE TABLE tasks (id INT PRIMARY KEY, finished BOOLEAN); -``` - -```sql -ycqlsh:example> INSERT INTO tasks (id, finished) VALUES (1, false); -``` - -```sql -ycqlsh:example> INSERT INTO tasks (id, finished) VALUES (2, false); -``` - -```sql -ycqlsh:example> UPDATE tasks SET finished = true WHERE id = 2; -``` - -```sql -ycqlsh:example> SELECT * FROM tasks; -``` - -``` -id | finished -----+---------- - 2 | True - 1 | False -``` - -## See also - -- [Data types](..#data-types) diff --git a/docs/content/preview/api/ycql/type_uuid.md b/docs/content/preview/api/ycql/type_uuid.md deleted file mode 100644 index d0afa811ff1c..000000000000 --- a/docs/content/preview/api/ycql/type_uuid.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -title: UUID and TIMEUUID data types [YCQL] -headerTitle: UUID and TIMEUUID -linkTitle: UUID and TIMEUUID -summary: UUID types -description: Use the UUID data type to specify columns for data of universally unique ids. TIMEUUID is a universal unique identifier variant that includes time information. -menu: - preview_api: - parent: api-cassandra - weight: 1460 -aliases: - - /preview/api/cassandra/type_uuid - - /preview/api/ycql/type_uuid -type: docs ---- - -## Synopsis - -Use the `UUID` data type to specify columns for data of universally unique IDs. `TIMEUUID` is a universal unique identifier variant that includes time information. - -Data type | Description | -----------|-----| -`UUID` | [UUID (all versions)](https://tools.ietf.org/html/rfc4122) | -`TIMEUUID` | [UUID (version 1)](https://tools.ietf.org/html/rfc4122#section-4.2.2) | - -## Syntax - -``` -type_specification ::= { UUID | TIMEUUID } -uuid_literal ::= 4hex_block 4hex_block '-' 4hex_block '-' 4hex_block '-' 4hex_block '-' 4hex_block 4hex_block 4hex_block -4hex_block ::= hex_digit hex_digit hex_digit hex_digit -``` - -Where - -- `hex_digit` is a hexadecimal digit (`[0-9a-fA-F]`). - -## Semantics - -- Columns of type `UUID` or `TIMEUUID` can be part of the `PRIMARY KEY`. -- Implicitly, values of type `UUID` and `TIMEUUID` data types are neither convertible nor comparable to other data types. -- `TIMEUUID`s are version 1 UUIDs: they include the date and time of their generation and a spatially unique node identifier. -- Comparison of `TIMEUUID` values first compares the time component and then (if time is equal) the node identifier. - -## Examples - -```sql -ycqlsh:example> CREATE TABLE devices(id UUID PRIMARY KEY, ordered_id TIMEUUID); -``` - -```sql -ycqlsh:example> INSERT INTO devices (id, ordered_id) - VALUES (123e4567-e89b-12d3-a456-426655440000, 123e4567-e89b-12d3-a456-426655440000); -``` - -```sql -ycqlsh:example> INSERT INTO devices (id, ordered_id) - VALUES (123e4567-e89b-42d3-a456-426655440000, 123e4567-e89b-12d3-a456-426655440000); -``` - -```sql -ycqlsh:example> UPDATE devices SET ordered_id = 00000000-0000-1000-0000-000000000000 - WHERE id = 123e4567-e89b-42d3-a456-426655440000; -``` - -```sql -ycqlsh:example> SELECT * FROM devices; -``` - -``` -id | ordered_id ---------------------------------------+-------------------------------------- - 123e4567-e89b-12d3-a456-426655440000 | 123e4567-e89b-12d3-a456-426655440000 - 123e4567-e89b-42d3-a456-426655440000 | 00000000-0000-1000-0000-000000000000 -``` - -## See also - -- [`Date and time Functions`](../function_datetime) -- [Data types](..#data-types) diff --git a/docs/content/preview/api/ysql/_index.md b/docs/content/preview/api/ysql/_index.md deleted file mode 100644 index 6642b3c82635..000000000000 --- a/docs/content/preview/api/ysql/_index.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -title: YSQL API reference -headerTitle: YSQL API reference -linkTitle: YSQL -description: Learn about Yugabyte Structured Query Language (YSQL), the distributed SQL API for the PostgreSQL compatible YugabyteDB database. -summary: Reference for the YSQL API -headcontent: PostgreSQL-compatible API -showRightNav: true -type: indexpage ---- - -## Introduction - -Yugabyte Structured Query Language (YSQL) is an ANSI SQL, fully-relational API that is best fit for scale-out RDBMS applications that need ultra resilience, massive write scalability, and geographic data distribution. The YugabyteDB SQL processing layer is built by using the [PostgreSQL](https://www.yugabyte.com/postgresql/) code (version 15) directly. The result of this approach is that [YSQL is fully compatible with PostgreSQL _by construction_](https://www.yugabyte.com/postgresql/postgresql-compatibility/). - -YSQL therefore supports all of the traditional relational modeling features, such as referential integrity (implemented using a foreign key constraint from a child table to a primary key to its parent table), joins, partial indexes, triggers, and stored procedures. It extends the familiar transactional notions into the YugabyteDB Distributed SQL Database architecture. - -If you don't find what you're looking for in the YSQL documentation, you might find answers in the relevant [PostgreSQL documentation](https://www.postgresql.org/docs/15/index.html). Successive YugabyteDB releases honor PostgreSQL syntax and semantics, although some features (for example those that are specific to the PostgreSQL monolithic SQL database architecture) might not be supported for distributed SQL. The YSQL documentation specifies the supported syntax and extensions. - -To find the version of the PostgreSQL processing layer used in YugabyteDB, you can use the `version()` function. The following YSQL query displays only the first part of the returned value: - -```plpgsql -select rpad(version(), 18)||'...' as v; -``` - -```output - v ------------------------ - PostgreSQL 15.2-YB... -``` - -## YSQL components - -The main components of YSQL include: - -- Data definition language (DDL) -- Data manipulation language (DML) -- Data control language (DCL) -- Built-in SQL functions -- PL/pgSQL procedural language for stored procedures - -These components depend on underlying features like the data type system (common for both SQL and PL/pgSQL), expressions, database objects with qualified names, and comments. Other components support purposes such as system control, transaction control, and performance tuning. - -### The SQL language - -The section [The SQL language](./the-sql-language/) describes of all of the YugabyteDB SQL statements. Each statement has its own dedicated page. Each page starts with a formal specification of the syntax: both as a _railroad diagram_; and as a _grammar_ using the PostgreSQL convention. Then it explains the semantics and illustrates the explanation with code examples. - -### Supporting language elements - -This section lists the main elements that support the YugabyteDB SQL language subsystem. - -- [Built-in SQL functions](exprs/). -- [Data types](datatypes/). Most PostgreSQL-compatible data types are supported. -- [Keywords](keywords/). -- Names and Qualifiers. Some names are reserved for the system. List of [reserved names](reserved_names/). - -## Quick Start - -You can explore the basics of the YSQL API using the [Quick Start](/preview/quick-start/macos/). - -It always helps to have access to a sandbox YugabyteDB cluster where you can, when you need to, do whatever you want without considering any risk of doing harm. Here are the kinds of things you'll want to do: - -- Connect as the _postgres_ role and create and drop other _superusers_, and regular roles. -- Create and drop databases -- Create and drop extensions -- Create and drop objects of all other kinds - -With these freedoms, you'll be able to set up any regime that you need to help you illustrate, or test, a hypothesis about how things work. - -Moreover, for some experiments, you'll need operating system access so that you can make changes to various configuration files (like the one that determines the default values for session parameters). - -It also helps to have a vanilla PostgreSQL installation on the same server so that you can confirm for yourself that the SQL systems of each (at least for the functionality that application developers use, and in the overwhelming majority of cases) are syntactically and semantically identical. - -To do all this confidently, you need to be sure that nobody else can use your sandbox so that you know that everything that you observe will be explained by what you deliberately did. Occasionally, you'll even want to destroy a cluster at one version and replace it with a cluster at a different version. - -The simplest way to achieve this ideal sandbox regime is to use your own laptop. The [Quick Start](/preview/quick-start/macos/) shows you how to do this. diff --git a/docs/content/preview/api/ysql/datatypes/_index.md b/docs/content/preview/api/ysql/datatypes/_index.md deleted file mode 100644 index 5d7a132d9cc6..000000000000 --- a/docs/content/preview/api/ysql/datatypes/_index.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -title: Data types [YSQL] -headerTitle: Data types -linkTitle: Data types -description: Data types -summary: YSQL data type overview and specification. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: api-ysql-datatypes - parent: ysql-language-elements - weight: 80 -type: indexpage ---- - -The following table lists the primitive and compound data types in YSQL. - -| Data type | Alias | Description | -|-----------|-------|-------------| -| [array](type_array/) | | One-dimensional or multidimensional rectilinear array of any data type payload | -| [bigint](type_numeric) | [int8](type_numeric) | Signed eight-byte integer | -| [bigserial](type_serial) | [serial8](type_serial) | Autoincrementing eight-byte integer | -| `bit [ (n) ]` 1 | | Fixed-length bit string | -| `bit varying [ (n) ]` 1 | `varbit [ (n) ]` | Variable-length bit string | -| [boolean](type_bool) | [bool](type_bool) | Logical boolean (true/false) | -| `box` 1 | | Rectangular box | -| [bytea](type_binary) | | Binary data | -| [character [ (n) ]](type_character) | [char [ (n) ]](type_character) | Fixed-length character string | -| [character varying [ (n) ]](type_character) | [varchar [ (n) ]](type_character) | Variable-length character string | -| `cidr` 1 | | IPv4 or IPv6 network address | -| `circle` 1 | | Circle on a plane | -| [date](type_datetime/) | | Calendar date (year, month, day) | -| [double precision](type_numeric) | [float8](type_numeric) | Double precision floating-point number (8 bytes) | -| `inet` 1 | | IPv4 or IPv6 host address | -| [integer](type_numeric) | [int, int4](type_numeric) | Signed four-byte integer | -| [interval [ fields ] [ (p) ]](type_datetime/) | | Time span | -| [json](type_json/) 1 | | Textual JSON data | -| [jsonb](type_json/) 1 | | JSON data, stored as decomposed binary | -| `line` 1 | | Infinite line on a plane | -| `lseg` 1 | | Line segment on a plane | -| `macaddr` 1 | | Media Access Control (MAC) address | -| `macaddr8` 1 | | Media Access Control (MAC) address (EUI-64 format) | -| [money](type_money) | | Currency amount | -| [numeric [ (p, s) ]](type_numeric) | [decimal [ (p, s) ]](type_numeric) | Exact fixed-point numeric | -| `path` 1 | | Geometric path on a plane | -| `pg_lsn` 1 | | Log Sequence Number | -| `point` 1 | | Geometric point | -| `polygon` 1 | | Closed geometric path | -| [real](type_numeric) | [float4](type_numeric) | Floating-point number (4 bytes) | -| [smallint](type_numeric) | [int2](type_numeric) | Signed two-byte integer | -| [int4range](type_range#synopsis) | | `integer` range | -| [int8range](type_range#synopsis) | | `bigint` range | -| [numrange](type_range#synopsis) | | `numeric` range | -| [tsrange](type_range#synopsis) | | `timestamp without time zone` range | -| [tstzrange](type_range#synopsis) | | `timestamp with time zone` range | -| [daterange](type_range#synopsis) | | `date` range | -| [smallserial](type_serial) | [serial2](type_serial) | Autoincrementing two-byte integer | -| [serial](type_serial) | [serial4](type_serial) | Autoincrementing four-byte integer | -| [text](type_character) | | Variable-length character string | -| [time [ (p) ] [ without time zone ]](type_datetime/) | | Time of day (no time zone) | -| [time [ (p) ] with time zone](type_datetime/) | [timetz](type_datetime/) | Time of day, including time zone | -| [timestamp [ (p) ] [ without time zone ]](type_datetime/) | | Date and time (no time zone) | -| [timestamp [ (p) ] with time zone](type_datetime/) | [timestamptz](type_datetime/) | Date and time, including time zone | -| `tsquery` 1 | | Text search query | -| `tsvector` 1 | | Text search document | -| `txid_snapshot` 1 | | Transaction ID snapshot | -| [uuid](type_uuid) | | Universally unique identifier | -| `xml` 2 | | XML data | - -1 Table columns of this type cannot be part of an `INDEX` `KEY`. - -2 Under development. diff --git a/docs/content/preview/api/ysql/datatypes/type_array/_index.md b/docs/content/preview/api/ysql/datatypes/type_array/_index.md deleted file mode 100644 index 778b5e66793a..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_array/_index.md +++ /dev/null @@ -1,373 +0,0 @@ ---- -title: YSQL array -linkTitle: Array -headerTitle: Array data types and functionality -description: YSQL lets you construct an array data type, of any dimensionality, of any built-in or user-defined data type. You can use this constructed data type for a table column and for a variable or formal parameter in a PL/pgSQL procedure. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: api-ysql-datatypes-array - parent: api-ysql-datatypes -aliases: - - /preview/api/ysql/datatypes/type_array -type: indexpage -showRightNav: true ---- - -## Synopsis - -A multidimensional array lets you store a large composite value in a single field (row-column intersection) in a table; and it lets you assign such a value to a PL/pgSQL variable, or pass it via a procedure's, or a function's, formal parameter. - -You can see from the declarations below that every value in an array is non-negotiably of the same data type—either a primitive data type like `text` or `numeric`, or a user-defined scalar or composite data type (like a _"row"_ type). - -An array is, by definition, a rectilinear N-dimensional set of "cells". You can picture a one-dimensional array as a line of cells, a two-dimensional array as a rectangle of cells, and a three-dimensional array as a cuboid of cells. The terms "line", "rectangle", and "cuboid" are the only specific ones. The generic term "N-dimensional array" includes these and all others. The meaning of "rectilinear" is sometimes captured by saying that the shape has no ragged edges or surfaces. If you try to create an array value that is not rectilinear, then you get an error whose detail says _"Multidimensional arrays must have sub-arrays with matching dimensions"_. The number of dimensions that an array has is called its _dimensionality_. - -{{< note title="Ragged arrays" >}} -Sometimes, a ragged structure is useful. Here's an example: -- a one-dimensional array of "payload" one-dimensional arrays, each of which might have a different length - -This structure is crucially different from a rectilinear two-dimensional array. A `DOMAIN` lets you create such a structure by providing the means to give the payload array data type a name. [Using an array of `DOMAIN` values](./array-of-domains/) shows how to do this. -{{< /note >}} - -A value within an array is specified by a tuple of _index_ values, like this (for a four-dimensional array): -``` -arr[13][7][5][17] -``` -The index is the cell number along the dimension in question. The index values along each dimension are consecutive—in other words, you cannot delete a cell within a array. This reflects the fact that an array is rectilinear. However, a value in a cell can, of course, be `NULL`. - -The leftmost value (`13` in the example) is the index along the first dimension; the rightmost value (`17` in this example) is the index along the Nth dimension—that is, the fourth dimension in this example. The value of the index of the first cell along a particular dimension is known as the _lower bound_ for that dimension. If you take no special steps when you create an array value, then the lower bound of each dimension is `1`. But, if you find it useful, you can specify any positive or negative integer, or zero, as the lower bound of the specified dimension. The lower bounds of an array are fixed at creation time, and so is its dimensionality. - -Correspondingly, each dimension has an upper bound. This, too, is fixed at array creation time. The index values along each dimension are consecutive. The fact that each dimension has a single value for its upper and lower bound reflects the fact that an array is rectilinear. - -If you read a within-array value with a tuple of index values that put it outside of the array bounds, then you silently get `NULL`. But if you attempt to set such an out-of-bounds value, then, because this is an implicit attempt to change the array's bounds, you get the _"array subscript out of range"_ error. - -Notice that you can create an array, using a single assignment, as a so-called "slice" of an array, by specifying desired lower and upper index values along each axis of the source array. The new array cannot have a different dimensionality than its source. You should specify the lower and upper index values for the slice, along each dimension of the source array, to lie within (or, maximally, coincide with) the bounds of that dimension. If you specify the slice with a lower bound less than the corresponding lower bound of the source array, then the new lower bound is silently interpreted as the extant corresponding source lower bound. The same is true for the upper bounds. The syntax of this method means that the lower bounds of the new array inevitably all start at `1`. Here is an example (in PL/pgSQL syntax) using a two-dimensional source array: - -``` -new_arr := source_arr[3:4][7:9]; -``` -**Note:** A one-dimensional array is a special case because, uniquely among N-dimensional shapes, it is tautologically rectilinear. You can increase the length of such an array implicitly, by setting a value in a cell that has a lower index value than the present lower bound or a higher index value than the present upper bound. Once you've done this, there is no way to reduce the length because there is no explicit operation for this and no "unset" operation for a specified cell. You can, however, create a slice so that the new array has the source array's original size. - -The following properties determine the shape of an array. Each can be observed using the listed dedicated function. The first formal parameter (with data type `anyarray`) is the array of interest . When appropriate, there's a second formal parameter (with data type `int`) that specifies the dimension of interest. The return is an `int` value, except in one case where it's a `text` value, as detailed below. - -- [`array_ndims()`](functions-operators/properties/#array-ndims) returns the dimensionality of the specified array. - -- [`array_lower()`](functions-operators/properties/#array-lower) returns the lower bound of the specified array on the specified dimension. - -- [`array_upper()`](functions-operators/properties/#array-upper) returns the upper bound of the specified array on the specified dimension. - -- [`array_length()`](functions-operators/properties/#array-length) returns the length of the specified array on the specified dimension. The length, the upper bound, and the lower bound, for a particular dimension, are mutually related, thus: -``` - "length" = "upper bound" - "lower bound" + 1 -``` - -- [`cardinality()`](functions-operators/properties/#cardinality) returns the total number of cells (and therefore values) in the specified array. The cardinality and length along each dimension are mutually related, thus: -``` - "cardinality" = "length 1" * "length 2" * ... * "length N" -``` - -- [`array_dims()`](functions-operators/properties/#array-dims) returns a text representation of the same information as `array_lower()` and `array_length()` return, for all dimension in a single `text` value, showing the upper and lower bounds like this: `[3:4][7:9][2:5]` for a three-dimensional array. Use this for human consumption. Use `array_lower()` and `array_length()` for programmatic consumption. - -Arrays are special because (unlike is the case for, for example, numeric data types like `decimal` and `int`, or character data types like `text` and `varchar`) there are no ready-made array data types. Rather, you construct the array data type that you need using an array _type constructor_. Here's an example: - -```plpgsql -create table t1(k int primary key, arr text array[4]); -``` -This syntax conforms to the SQL Standard. Notice that `array` is a reserved word. (You cannot, for example, create a table with that name.) It appears to let you specify just a one-dimensional array and to specify how many values it holds. But both of these apparent declarations of intent are ignored and act, therefore, only as potentially misleading documentation. - -The following illustrates the PostgreSQL extension to the Standard that YSQL, therefore, inherits.: - -```plpgsql -create table t2( - k int primary key, - one_dimensional_array int[], - two_dimensional_array int[10][10]); -``` -Notice that it appears, optionally, to let you specify how many values each dimension holds. (The Standard syntax allows the specification of the length of just one dimension.) However, these apparent declarations of intent, too, are silently ignored. Moreover, even the _dimensionality_ is ignored. The value, in a particular row, in a table column with an array data type (or its cousin, a variable in a PL/pgSQL program) can hold an array value of _any_ dimensionality. This is demonstrated by example in [Multidimensional array of `int` values](./literals/array-of-primitive-values/#multidimensional-array-of-int-values). This means that declaring an array using the reserved word `array`, which apparently lets you define only a one-dimensional array, and declaring an array using `[]`, which apparently lets you define array of any dimensionality, where one, some, or all of the dimensions are nominally constrained, are entirely equivalent. - -The possibility that different rows in the same table column can hold array values of different dimensionality is explained by picturing the implementation. Array values are held, in an opaque internal representation, as a linear "ribbon" of suitably delimited values of the array's data type. The array's actual dimensionality, and the upper and lower bound of the index along each dimension, is suitably represented in a header. This information is used, in a trivial arithmetic formula, to translate an address specification like `arr[13][7][5][17]` into the position of the value, as a single integer, along the ribbon of values. Understanding this explains why, except for the special case of a one-dimensional array, the dimensionality and the bounds of an array value are fixed at creation time. It also explains why a few of the array functions are supported only for one-dimensional arrays. - -Yugabyte recommends that, for uniformity, you choose to declare arrays only with this syntax: - -``` -create table t2( - k int primary key, - one_dimensional_array int[], - two_dimensional_array int[]); -``` - -The `array_ndims()` function lets you define a table constraint to insist that the array dimensionality is fixed for every row in a table column with such a data type. The `array_length()` function lets you insist that each dimension of a multidimensional array has a specified length for every row, or that its length doesn't exceed a specified limit for any row. - -## Atomically null vs having all values null - -Here is a minimal example: -```plpgsql -create table t(k int primary key, v int[]); -insert into t(k) values(1); -insert into t(k, v) values (2, '{null}'::int[]); -\pset null '' -select k, v, array_dims(v) as dims from t order by k; -``` -It shows this: - -``` - k | v | dims ----+-----------+----------- - 1 | | - 2 | {NULL} | [1:1] -``` - -Because _"v"_ has no constraint, it can be `NULL`, just like when its data type is scalar. This is the case for the row with _"k = 1"_. Here, _"v"_ is said to be _atomically null_. (This term is usually used only when the data type is composite to distinguish the outcome from what is seen for the row with _"k = 2"_ where _"v"_ is not atomically null. The array properties of the first row's _"v"_, like its dimensionality, are all `NULL`. But for the second row, they have meaningful, `not null`, values. Now try this: -```plpgsql -update t set v = v||'{null}'::int[] where k = 2; -select k, v, array_dims(v) as dims from t where k = 2; -``` -The `||` operator is explained in [Array concatenation functions and operators](./functions-operators/concatenation/#the-160-160-160-160-operator). The query shows this: - -``` - k | v | dims ----+-------------+------- - 2 | {NULL,NULL} | [1:2] -``` -Here, _"v"_ for the second row, while not atomically null, has all of its values `NULL`. Its dimensionality cannot be changed, but because it is a one dimensional array, its length can be extended, as was explained above. This is allowed: -```plpgsql -update t set v[0] = 17 where k = 2; -select k, v, array_dims(v) as dims from t where k = 2; -``` -It shows this: -``` - k | v | dims ----+---------------------------+------- - 2 | [0:3]={17,NULL,NULL,NULL} | [0:3] -``` - This, too, is allowed: -```plpgsql -update t set v[1] = 42 where k = 1; -select k, v, array_dims(v) as dims from t where k = 1; -``` -It shows this: -``` - k | v | dims ----+------+------- - 1 | {42} | [1:1] -``` - -The dimensionality of _"v"_ for this first row has now been irrevocably established. - - -## Type construction - -Arrays are not the only example of type construction. So, also, are _"row"_ types and `DOMAIN`s: - -```plpgsql -create type rec_t as(f1 int, f2 text); - -create domain medal_t as text -check( - length(value) <= 6 and - value in ('gold', 'silver', 'bronze') -); - -create table t3(k int primary key, rec rec_t, medal medal_t); -``` - -Notice that you must define a _"row"_ type or a `DOMAIN` as a schema object. But you define the data type of an array "in place" when you create a table or write PL/pgSQL code, as was illustrated above. To put this another way, you _cannot_ name a constructed array type. Rather, you can use it only "on the fly" to define the data type of a column, a PL/pgSQL variable, or a PL/pgSQL formal parameter. The consequence of this is that while you _can_ define, for example, the data type of a named field in a _"row"_ type as an array of a specified data type, you _cannot_ define an array of a specified array data type. (If you try to write such a declaration, you'll see, as you type it, that you have no way to express what you're trying to say.) - -## Informal sketch of array functionality - -This sections within this "Array data types and functionality" major section carefully describe what is sketched here. - -_First_, create a table with an `int[]` column and populate it with a two-dimensional array by using an array literal. -```plpgsql -create table t( - k int primary key, v int[]); - -insert into t(k, v) values(1, - '{ - {11, 12, 13}, - {21, 22, 23} - } - '::int[]); -``` -_Next_, look at a direct `::text` typecast of the value that was inserted: - -```plpgsql -select v::text from t where k = 1; -``` -It shows this: -``` - v -------------------------- - {{11,12,13},{21,22,23}} -``` -Notice that, apart from the fact that it has no whitespace, this representation is identical to the literal that defined the inserted array. It can therefore be used in this way. - -_Next_ check that the inserted array value has the expected properties: -```plpgsql -select - array_ndims(v), - array_length(v, 1), - array_length(v, 2), - array_dims(v) -from t where k = 1; -``` -It shows this: -``` - array_ndims | array_length | array_length | array_dims --------------+--------------+--------------+------------ - 2 | 2 | 3 | [1:2][1:3] -``` - -The `array_ndims()` function reports the dimensionality of the array; `array_length()` reports the length of the specified dimension (that is, the number of values that this dimension has); and `array_dims()` presents the same information, as a single `text` value, as using `array_length()` in turn for each dimension does. Notice that `array_length()` returns a _single_ `int` value for the specified dimension. Its design rests upon a rule, exemplified by saying that a two-dimensional array must be a rectangle (it cannot have a ragged edge). In the same way, a three-dimensional array must be a cuboid (it cannot have an uneven surface). This notion, though its harder to visualise, continues to apply as the number of dimensions increases. - -Here's an example that violates the rule: -```plpgsql -insert into t(k, v) values(2, - '{ - {11, 12, 13}, - {21, 22, 23, 24} - } - '::int[]); -``` - -The formatting emphasizes that its edge is ragged. It causes a _"22P02: malformed array literal"_ error whose detail says _"Multidimensional arrays must have sub-arrays with matching dimensions"_. - -Finally, in this sketch, this `DO` block shows how you can visualise the values in a two-dimensional array as a rectangular grid. - -```plpgsql -do $body$ -declare - arr constant int[] not null:= '{ - {11, 12, 13, 14}, - {21, 22, 23, 24}, - {31, 32, 33, 34} - }'::int[]; - - ndims constant int not null := array_ndims(arr); - line text; -begin - if array_ndims(arr) <> 2 then - raise exception 'This code handles only a two-dimensional array.'; - end if; - - declare - len1 constant int not null := array_length(arr, 1); - len2 constant int not null := array_length(arr, 2); - begin - for row in 1..len1 loop - line := ' '; - for col in 1..len2 loop - line := line||lpad(arr[row][col]::text, 5); - end loop; - raise info '%', line; - end loop; - end; -end; -$body$; -``` -It produces this result (after manually stripping the _"INFO:"_ prompts): -``` - 11 12 13 14 - 21 22 23 24 - 31 32 33 34 -``` -This approach isn't practical for an array with higher dimensionality or for a two-dimensional array whose second dimension is large. Rather, this code is included here to show how you can address individual elements. The names of the implicitly declared `FOR` loop variables _"row"_ and _"col"_ correspond intuitively to how the values are laid out in the literal that defines the array value. The nested loops are designed to visit the values in so-called row-major order (the last subscript varies most rapidly). - -The term _"row-major order"_ is explained in [Joint semantics](./functions-operators/properties/#joint-semantics) within the section _"Functions for reporting the geometric properties of an array"_. - -When, for example, the values of same-dimensioned multidimensional arrays are compared, they are visited in this order and compared pairwise in just the same way that scalar values are compared. - -**Note:** The term "_row-major order"_ is explained in [Joint semantics](./functions-operators/properties/#joint-semantics)) within the _"Functions for reporting the geometric properties of an array"_ section. it contains a an example PL/pgSQL procedure that shows how to traverse an arbitrary two-dimensional array's values, where the lower bounds and lengths along each dimension are unknown beforehand, in this order. - -Notice that, in the example above, the first value in each dimension has index value 1. This is the case when an array value is created using a literal and you say nothing about the index values. The next example shows how you can control where the index values for each dimension start and end. -```plpgsql -\pset null '' -with v as ( - select '[2:4][5:8]= - { - {25, 26, 27, 28}, - {35, 36, 37, 38}, - {45, 46, 47, 48} - }'::int[] as arr) -select - arr[0][0] as "[0][0]", - arr[2][5] as "[2][5]", - arr[2][8] as "[2][8]", - arr[4][5] as "[4][5]", - arr[4][8] as "[4][8]", - arr[9][9] as "[9][9]" -from v; -``` -In this syntax, `[2:4]` says that the index runs from 2 through 4 on the first dimension; and `[5:8]` says that runs from 5 through 8 on the second dimension. The values have been chosen to illustrate this. Of course, you must provide the right number of values for each dimension. The query produces this result: -``` - [0][0] | [2][5] | [2][8] | [4][5] | [4][8] | [9][9] ------------+--------+--------+--------+--------+----------- - | 25 | 28 | 45 | 48 | -``` -Notice that if you access an element whose index values put it outside the ranges of the defined values, then, as mentioned, you silently get `NULL`. - -The values in an array are stored by laying out their internal representations consecutively in row-major order. This term is explained in [Joint semantics](./functions-operators/properties/#joint-semantics)) within the _"Functions for reporting the geometric properties of an array"_ section. Because every value has the same data type, this means that a value of interest can be addressed quickly, without index support, by calculating its offset. The value itself knows its dimensions. This explains how arrays of different dimensionality can be stored in a single table column. Even when the representations are of variable length (as is the case with, for example, `text` values), each knows its length so that the value boundaries can be calculated. - -## Uses of arrays - -You can use a one-dimensional array to store a graph, like temperature readings as a function of time. But the time axis is implicit: it's defined by each successive value's index. The application decides how to translate the integral index value to a time value. - -You can use a two-dimensional array to store a surface. For example you could decide to interpret the first index as an increment in latitude, and the second index as an increment in longitude. You might, then, use the array values to represent, say, the average temperature, over some period, at a location measured at points on a rectangular grid. - -A trained machine learning model is likely to be either a single array with maybe five or six dimensions and with fixed size. Or might be a collection of such arrays. It's useful, for various practical reasons, to store several of such models, corresponding to different stages of training or to different detailed use areas. The large physics applications at the Lawrence Livermore National Laboratory represent, and store, observations as multi-dimensional arrays. - -In these uses, your requirement is to persist the data and then to retrieve it (possibly retrieving just a slice) for programmatic analysis of the kind for which SQL is at best cumbersome or at worst inappropriate. For example, a one-dimensional array might be used to represent a path on a horizontal surface, where the value is a row representing the _(x, y)_ coordinate pair, and you might want to fit a curve through the data points to smooth out measurement inaccuracies. The [GPS trip data](./#example-use-case-gps-trip-data) use case, described below, typifies this use of arrays. - -Some use cases call for a multidimensional _ragged_ array-like structure. Such a structure doesn't qualify for the name "array" because it isn't rectilinear. The note above points to [Using an array of `DOMAIN` values](./array-of-domains/) which shows how to implement such a ragged structure. - -## Example use case: GPS trip data - -Amateur cyclists like to record their trips using a GPS device and then to upload the recorded data to one of no end of Internet sites, dedicated to that purpose, so that they can review their trips, and those of others, whenever they want to into the indefinite future. Such a site might use a SQL database to store all these trips. - -The GPS device lets the cyclist split the trip into successive intervals, usually called laps, so that they can later focus their review attention on particular laps of interest like, for example, a notorious steep hill. So each trip has one or many laps. A lap is typically no more than about 100 km—and often more like 5-10 km. But it could be as large as, say, 300 km. The resolution of modern devices is typically just a few paces under good conditions—say 3m. So a lap could have as many as 100,000 GPS data points, each of which records the timestamp, position, and no end of other associated instantaneous values of facts like, for example, heart rate. - -This sounds like a classic three table design, with foreign key constraints to capture the notion that a GPS data point belongs to a lap and that a lap belongs to a trip. The array data type allows all of the GPS data points that belong to a lap to be recorded in a single row in the _"laps"_ table—in other words as a multivalued field, thus: - -```plpgsql -create type gps_data_point_t as ( - ts timestamp, - lat numeric, - long numeric, - alt numeric, - cadence int, - heart_rate int - ... - ); - -create table laps( - lap_start_ts timestamp, - trip_start_ts timestamp, - userid uuid, - gps_data_points gps_data_point_t[], - - constraint laps_pk primary key (lap_start_ts, trip_start_ts, userid), - - constraint laps_fk foreign key (trip_start_ts, userid) - references trips(trip_start_ts, userid) - match full on delete cascade on update restrict); -``` -**Note:** In PostgreSQL, the maximum number of values that an array of any dimensionality can hold is `(2^27 - 1)` (about 137 million). If you exceed this limit, then you get a clear _"54000: array size exceeds the maximum allowed (134217727)"_ error. This maps to the PL/pgSQL exception _"program_limit_exceeded"_. In PostgreSQL, array values are stored out of line. However, in the YugabyteDB YSQL subsystem, they are stored in line, just like, for example, a `json` or `jsonb` value. As a consequence, the maximum number of values that a YSQL array can accommodate is smaller than the PostgreSQL limit. Moreover, the actual YSQL limit depends on circumstances—and when it's exceeded you get a "time out" error. Experiment shows that the limit is about 30 million values. You can test this for yourself using [`array_fill()`](./functions-operators/array-fill/)) function. - -With about 100,000 GPS data points, a 300 km trip is easily accommodated. - -The design that stores the GPS points in an array certainly breaks one of the time-honored rules of relational design: that column data types should be scalars. It does, however, bring definite advantages without the correctness risk and loss of functionality that it might in other use cases. - -For example, in the classic _"orders"_ and _"order_lines"_ design, an order line is for a quantity of a particular item from the vendor's catalog. And order lines for many different users will doubtless refer to the same catalog item. The catalog item has lots of fields; and some of them (especially the price) sometimes must be updated. Moreover, the overall business context implies queries like this: _find the total number of a specified catalog item that was ordered, by any user, during a specified time period_. Clearly a fully normal Codd-and-Date design is called for here. - -It's different with GPS data. The resolution of modern devices is so fine (typically just a few paces, as mentioned) that it's hugely unlikely that two different GPS data points would have the same position. It's even less likely that different point would share the same heart rate and all the other facts that are recorded at each position. In other words it's inconceivable that a query like the example given for the *"orders"* use case (_find the trips, by any user, that all share a common GPS data point_) would be useful. Moreover, all typical uses require fetching a trip and all its GPS data in a single query. One obvious example is to plot the transit of a lap on a map. Another example is to compute the generous containing envelope for a lap so that the set of coinciding lap envelopes can be discovered and analyzed to generate leader board reports and the like. SQL is not up to this kind of computation. Rather, you need procedural code—either in a stored procedure or in a client-side program. - -The is use case is taken one step, by using a ragged array-like structure, in [Example use case: GPS trip data (revisited)](./array-of-domains/#example-use-case-gps-trip-data-revisited). - -## Organization of the remaining array functionality content - -The following sections explain the details about array data types and functionality: - -- [The `array[]` value constructor](./array-constructor/) -- [Creating an array value using a literal](./literals/) -- [Built-in SQL functions and operators for arrays](./functions-operators/) -- [Using an array of `DOMAIN` values](./array-of-domains) diff --git a/docs/content/preview/api/ysql/datatypes/type_array/functions-operators/_index.md b/docs/content/preview/api/ysql/datatypes/type_array/functions-operators/_index.md deleted file mode 100644 index 9197f6573aec..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_array/functions-operators/_index.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Array functions and operators -linkTitle: Functions and operators -headerTitle: Array functions and operators -description: Array functions and operators -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: array-functions-operators - parent: api-ysql-datatypes-array - weight: 90 -type: indexpage -showRightNav: true ---- - -**Note:** For an alphabetical listing of the array functions and operators, see the listing in the navigation bar. - -Most of the functions and operators listed here can use an array of any dimensionality, but four of the functions accept, or produce, only a one-dimensional array. This property is called out by the second column _"1-d only?"_ in the tables that follow. The restricted status is indicated by _"1-d"_ in that function's row. When the field is blank, there is no dimensionality restriction. - -## Functions for creating arrays from scratch - -The `array[]` constructor, and the three functions, create an array from scratch. - -| Function or operator | 1-d only? | Description | -| ---- | ---- | ---- | -| [`array[]`](./../array-constructor/) | | The array[] value constructor is a special variadic function that creates an array value from scratch using an expression for each of the array's values. Such an expression can itself use the `array[]` constructor or an [array literal](../literals/). | -| [`array_fill()`](./array-fill/) | | Returns a new "blank canvas" array of the specified shape with all cells set to the same specified value. | -| [`array_agg()`](./array-agg-unnest/#array-agg) | | Returns an array (of an implied _"row"_ type) from a SQL subquery. | -| [`string_to_array()`](./string-to-array/) | 1-d | Returns a one-dimensional `text[]` array by splitting the input `text` value into subvalues using the specified `text` value as the delimiter. Optionally, allows a specified `text` value to be interpreted as `NULL`. | - -## Functions for reporting the geometric properties of an array - -| Function | 1-d only? | Description | -| ---- | ---- | ---- | -| [`array_ndims()`](./properties/#array-ndims) | | Returns the dimensionality of the specified array. | -| [`array_lower()`](./properties/#array-lower) | | Returns the lower bound of the specified array along the specified dimension. | -| [`array_upper()`](./properties/#array-upper) | | Returns the upper bound of the specified array along the specified dimension. | -| [`array_length()`](./properties/#array-length) | | Returns the length of the specified array along the specified dimension. | -| [`cardinality()`](./properties/#cardinality) | | Returns the total number of values in the specified array. | -| [`array_dims()`](./properties/#array-dims) | | Returns a text representation of the same information as `array_lower()` and `array_length()`, for all dimensions, in a single text value. | - -## Functions to find a value in an array - -| Function | 1-d only? | Description | -| ---- | ---- | ---- | -| [`array_position()`](./array-position/#array-position) | 1-d | Returns the index, in the supplied array, of the specified value. Optionally starts searching at the specified index. | -| [`array_positions()`](./array-position/#array-positions) | 1-d | Returns the indexes, in the supplied array, of all occurrences the specified value. | - -## Operators to test whether a value is in an array - -These operators require that the [LHS](https://en.wikipedia.org/wiki/Sides_of_an_equation) is a scalar and that -the [RHS](https://en.wikipedia.org/wiki/Sides_of_an_equation) is an array of that LHS's data type. - -| Operator | 1-d only? | Description | -| ---- | ---- | ---- | -| [`ANY`](./any-all/) | | Returns `TRUE` if _at least one_ of the specified inequality tests between the LHS element and each of the RHS array's elements evaluates to `TRUE`. | -| [`ALL`](./any-all/) | | Returns `TRUE` if _every one_ of the specified inequality tests between the LHS element and each of the RHS array's elements evaluates to `TRUE`. | - -## Operators for comparing two arrays - -These operators require that the [LHS and RHS](https://en.wikipedia.org/wiki/Sides_of_an_equation) arrays have the same data type. - -| Operator | 1-d only? | Description | -| ---- | ---- | ---- | -| [`=`](./comparison/#the-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS and RHS arrays are equal. | -| [`<>`](./comparison/#the-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS and RHS arrays are not equal. | -| [`>`](./comparison/#the-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS array is greater than the RHS array. | -| [`>=`](./comparison/#the-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS array is greater than or equal to the RHS array. | -| [`<=`](./comparison/#the-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS array is less than or equal to the RHS array. | -| [`<`](./comparison/#the-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-and-160-160-160-160-operators) | | Returns `TRUE` if the LHS array is less than the RHS array. | -| [`@>`](./comparison/#the-160-160-160-160-and-160-160-160-160-operators-1) | | Returns `TRUE` if the LHS array contains the RHS array—that is, if every distinct value in the RHS array is found among the LHS array's distinct values. | -| [`<@`](./comparison/#the-160-160-160-160-and-160-160-160-160-operators-1) | | Returns `TRUE` if the LHS array is contained by the RHS array—that is, if every distinct value in the LHS array is found among the RHS array's distinct values. | -| [`&&`](./comparison/#the-160-160-160-160-operator) | | Returns `TRUE` if the LHS and RHS arrays overlap—that is, if they have at least one value in common. | - - -## The slice operator - -| Operator | 1-d only? | Description | -| ---- | ---- | ---- | -|[`[lb1:ub1]...[lbN:ubN]`](./slice-operator/) | | Returns a new array whose length is defined by specifying the slice's lower and upper bound along each dimension. These specified slicing bounds must not exceed the source array's bounds. The new array has the same dimensionality as the source array and its lower bound is `1` on each axis. | - -## Functions and operators for concatenating an array with an array or an element - -These functions require that the two arrays have the same data type and compatible dimensionality. - -| Function or operator | 1-d only? | Description | -| ---- | ---- | ---- | -| [`||`](./concatenation/#the-160-160-160-160-operator) | | Returns the concatenation of any number of compatible `anyarray` and `anyelement` values. | -| [`array_cat()`](./concatenation/#array-cat) | | Returns the concatenation of two compatible `anyarray` values. | -| [`array_append()`](./concatenation/#array-append) | | Returns an array that results from appending a scalar value to (that is, _after_) an array value. | -| [`array_prepend()`](./concatenation/#array-prepend) | | Returns an array that results from prepending a scalar value to (that is, _before_) an array value. | - -## Functions and operators to change values in an array - -| Function or operator | 1-d only? | Description | -| ---- | ---- | ---- | -| [`array_replace()`](./replace-a-value/#array-replace) | | Returns a new array where every occurrence of the specified value in the input array has been replaced by the specified new value. | -| [`arr[idx_1]...[idx_N] := val`](./replace-a-value/#setting-an-array-value-explicitly-and-in-place) | | Update a value in an array "in place". | -| [`array_remove()`](./array-remove) | 1-d | Returns a new array where _every_ occurrence of the specified value has been removed from the specified input array. | - -## Function to convert an array to a text value - -| Function | 1-d only? | Description | -| ---- | ---- | ---- | -| [`array_to_string()`](./array-to-string) | | Returns a `text` value computed by representing each array value, traversing these in row-major order, by its `::text` typecast, using the supplied delimiter between each such representation. (The result, therefore, loses all information about the arrays geometric properties.) Optionally, represent `NULL` by the supplied `text` value. | - -## Table function to transform an array into a SETOF anyelement - -| Function | 1-d only? | Description | -| ---- | ---- | ---- | -| [`unnest()`](./array-agg-unnest/#unnest) | | Use in the `FROM` clause of a `SELECT` statement. The simple overload accepts a single `anyarray` value and returns a `SETOF anyelement`. The exotic overload accepts a variadic list of `anyarray` values and returns a `SETOF` with many columns where each, in turn, has the output of the corresponding simple overload. | - -## Table function to transform an array into a SETOF index values - -| Function | 1-d only? | Description | -| ---- | ---- | ---- | -| [`generate_subscripts()`](./array-agg-unnest/#generate-subscripts) | | Use in the `FROM` clause of a `SELECT` statement. Returns the values of the indexes along the specified dimension of the specified array. | diff --git a/docs/content/preview/api/ysql/datatypes/type_array/literals/_index.md b/docs/content/preview/api/ysql/datatypes/type_array/literals/_index.md deleted file mode 100644 index a6a2e4dec5fc..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_array/literals/_index.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: Creating an array value using a literal -linkTitle: Literals -headerTitle: Creating an array value using a literal -description: Creating an array value using a literal -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: array-literals - parent: api-ysql-datatypes-array - weight: 20 -type: indexpage ---- - -This section introduces array literals informally with a few examples. Its subsections, listed below, explain formally how you construct syntactically correct array literals that establish the values that you intend. - -An array literal starts with a left curly brace. This is followed by some number of comma-separated literal representations for the array's values. Sometimes, the value representations need not be double-quoted—but _may_ be. And sometimes the value representations must be double-quoted. The array literal then ends with a right curly brace. Depending on the array's data type, its values might be scalar, or they might be composite. For example, they might be _"row"_ type values; or they might be arrays. The literal for a multidimensional array is written as an array of arrays of arrays... and so on. They might even be values of a user-defined `DOMAIN` which is based on an array data type. This powerful notion is discussed in the dedicated section [Using an array of `DOMAIN` values](../array-of-domains/). - -To use such a literal in SQL or in PL/pgSQL it must be enquoted in the same way as is an ordinary `text` literal. You can enquote an array literal using dollar quotes, if this suits your purpose, just as you can for a `text` literal. You sometimes need to follow the closing quote with a suitable typecast operator for the array data type that you intend. And sometimes the context of use uniquely determines the literal's data type. It's never wrong to write the typecast explicitly—and it's a good practice always to do this. - -Here, in use in a SQL `SELECT` statement, is the literal for a one-dimensional array of primitive `int` values: - -```plpgsql -\t on -select '{1, 2, 3}'::int[]; -``` - -The `\t on` meta-command suppresses column headers and the rule-off under these. Unless the headers are important for understanding, query output from `ysqlsh` will be shown, throughout the present "arrays" major section, without these. - -This is the output that the first example produces: - -```output - {1,2,3} -``` - -The second example surrounds the values that the array literal defines with double quotes: - -```plpgsql -select '{"1", "2", "3"}'::int[]; -``` - -It produces the identical output to the first example, where no double quotes were used. - -The third example defines a two-dimensional array of `int` values: - -```plpgsql -select ' - { - {11, 12, 13}, - {21, 22, 23} - } - '::int[]; -``` - -It produces this result: - -```output - {{11,12,13},{21,22,23}} -``` - -The fourth example defines an array whose values are instances of a _"row"_ type: - -```plpgsql -create type rt as (f1 int, f2 text); - -select ' - { - "(1,a1 a2)", - "(2,b1 b2)", - "(3,c1 v2)" - } -'::rt[]; -``` - -It produces this output: - -```output - {"(1,\"a1 a2\")","(2,\"b1 b2\")","(3,\"c1 v2\")"} -``` - -All whitespace (except, of course, within the text values) has been removed. The double quotes around the representation of each _"row"_ type value are retained. This suggests that they are significant. (Test this by removing them. It causes the _"22P02: malformed row literal"_ error.) Most noticeably, there are clearly rules at work in connection with the representation of each `text` value within the representation of each _"row"_ type value. - -The following sections present the rules carefully and, when the rules allow some freedom, give recommendations. - -[The text typecast of a value, the literal for that value, and how they are related](./text-typecasting-and-literals/) establishes the important notions that allow you to distinguish between a _literal_ and the _text of the literal_. It's the _text_ of an array literal that, by following specific grammar rules for this class of literal, actually defines the intended value. The literal, as a whole, enquotes this bare text and typecasts it to the desired target array data type. - -[The literal for an array of primitive values](./array-of-primitive-values/) gives the rules for array literals whose values are scalars (for example, are of primitive data types). - -[The literal for a _"row"_ type value](./row/) gives the rules for the literal for a value of a _"row"_ type. These rules are essential to the understanding of the next section. - -[The literal for an array of _"row"_ type values](./array-of-rows/) gives the rules for array literals whose values are composite (that is, a _"row"_ type). - -These rules are covered in the following sections of the PostgreSQL documentation: - -- [8.15. Arrays](https://www.postgresql.org/docs/15/arrays.html) - -- [8.16. Composite Types](https://www.postgresql.org/docs/15/rowtypes.html) diff --git a/docs/content/preview/api/ysql/datatypes/type_bool.md b/docs/content/preview/api/ysql/datatypes/type_bool.md deleted file mode 100644 index a409d2ee3eff..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_bool.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: BOOLEAN data types -headerTitle: BOOLEAN data types -linkTitle: Boolean -description: Use the BOOLEAN data type to represent three different states - TRUE, FALSE, or NULL. -menu: - preview_api: - identifier: api-ysql-datatypes-bool - parent: api-ysql-datatypes -aliases: - - /preview/api/ysql/datatypes/type_bool -type: docs ---- - -## Synopsis - -The `BOOLEAN` data type represents three different states: `TRUE`, `FALSE`, or `NULL`. - -## Description - -```ebnf -type_specification ::= { BOOLEAN | BOOL } -literal ::= { TRUE | true | 't' | 'y' | 'yes' | 'on' | 1 | - FALSE | false | 'f' | 'n' | 'no' | 'off' | 0 } -``` diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/_index.md deleted file mode 100644 index 6a7eb7871f77..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/_index.md +++ /dev/null @@ -1,174 +0,0 @@ ---- -title: Date and time data types and functionality [YSQL] -headerTitle: Date and time data types and functionality -linkTitle: Date and time -description: Learn about YSQL support for the date, time, timestamp, and interval data types and their functions and operators. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: api-ysql-datatypes-datetime - parent: api-ysql-datatypes -type: indexpage -showRightNav: true ---- -## Synopsis - -YSQL supports the following data types for values that represent a date, a time of day, a date-and-time-of-day pair, or a duration. These data types will be referred to jointly as the _date-time_ data types. - -| Data type | Purpose | Internal format | Min | Max | Resolution | -| -------------------------------------------------------------------------------------------------- | --------------------------------- | ----------------------- | -------- | ---------- | ------------- | -| [date](./date-time-data-types-semantics/type-date/) | date moment (wall-clock) | 4-bytes | 4713 BC | 5874897 AD | 1 day | -| [time](./date-time-data-types-semantics/type-time/) [(p)] | time moment (wall-clock) | 8-bytes | 00:00:00 | 24:00:00 | 1 microsecond | -| [timetz](#avoid-timetz) [(p)] | _[avoid this](#avoid-timetz)_ | | | | | -| [timestamp](./date-time-data-types-semantics/type-timestamp/#the-plain-timestamp-data-type) [(p)] | date-and-time moment (wall-clock) | 12-bytes | 4713 BC | 294276 AD | 1 microsecond | -| [timestamptz](./date-time-data-types-semantics/type-timestamp/#the-timestamptz-data-type) [(p)] | date-and-time moment (absolute) | 12-bytes | 4713 BC | 294276 AD | 1 microsecond | -| [interval](./date-time-data-types-semantics/type-interval/) [fields] [(p)] | duration between two moments | 16-bytes 3-field struct | | | 1 microsecond | - -The optional _(p)_ qualifier, where _p_ is a literal integer value in _0..6_, specifies the precision, in microseconds, with which values will be recorded. (It has no effect on the size of the internal representation.) The optional _fields_ qualifier, valid only in an _interval_ declaration, is explained in the [_interval_ data type](./date-time-data-types-semantics/type-interval/) section. - -The spelling _timestamptz_ is an alias, defined by PostgreSQL and inherited by YSQL, for what the SQL Standard spells as _timestamp with time zone_. The unadorned spelling, _timestamp_, is defined by the SQL Standard and may, optionally, be spelled as _timestamp without time zone_. A corresponding account applies to _timetz_ and _time_. - -Because of their brevity, the forms (plain) _time_, _timetz_, (plain) _timestamp_, and _timestamptz_ are used throughout this _"Date and time data types"_ main section rather than the verbose forms that spell the names using _without time zone_ and _with time zone_. - -A value of the _interval_ data type represents a _duration_. In contrast, a value of one of the other five data types each represents a _point in time_ (a.k.a. a _moment_). - -Subtraction between a pair of moment values with the same data type produces, with one exception, an _interval_ value. Exceptionally, subtracting one _date_ value from another produces an _integer_ value. - -{{< tip title="Avoid using the 'timetz' data type." >}} -The [PostgreSQL documentation](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-DATETIME-TABLE) recommends against using the _timetz_ (a.k.a. _time with time zone_) data type. This text is slightly reworded: - -> The data type _time with time zone_ is defined by the SQL standard, but the definition exhibits properties which lead to questionable usefulness. In most cases, a combination of _date_, (plain) _time_, (plain) _timestamp_, and _timestamptz_ should provide the complete range of _date-time_ functionality that any application could require. - -The thinking is that a notion that expresses only what a clock might read in a particular timezone gives only part of the picture. For example when a clock reads 20:00 in _UTC_, it reads 03:00 in China Standard Time. But 20:00 _UTC_ is the evening of one day and 03:00 is in the small hours of the morning of the _next day_ in China Standard Time. (Neither _UTC_ nor China Standard Time adjusts its clocks for Daylight Savings.) The data type _timestamptz_ represents both the time of day and the date and so it handles the present use case naturally. No further reference will be made to _timetz_. -{{< /tip >}} -
-{{< note title="Maximum and minimum supported values." >}} -You can discover that you can define an earlier _timestamp[tz]_ value than _4713-01-01 00:00:00 BC_, or a later one than _294276-01-01 00:00:00_, without error. Try this: - -```plpgsql --- The domain "ts_t" is a convenient single point of maintenance to allow --- choosing between "plain timestamp" and "timestamptz" for the test. -drop domain if exists ts_t cascade; -create domain ts_t as timestamptz; - -drop function if exists ts_limits() cascade; -create function ts_limits() - returns table(z text) - language plpgsql -as $body$ -declare - one_sec constant interval not null := make_interval(secs=>1); - - max_ts constant ts_t not null := '294276-12-31 23:59:59 UTC AD'; - min_ts constant ts_t not null := '4714-11-24 00:00:00 UTC BC'; - t ts_t not null := max_ts; -begin - z := 'max_ts: '||max_ts::text; return next; - begin - t := max_ts + one_sec; - exception when datetime_field_overflow - -- 22008: timestamp out of range - then - z := 'max_ts overflowed'; return next; - end; - - z := ''; return next; - - z := 'min_ts: '||min_ts::text; return next; - begin - t := min_ts - one_sec; - exception when datetime_field_overflow - -- 22008: timestamp out of range - then - z := 'min_ts underflowed'; return next; - end; -end; -$body$; - -set timezone = 'UTC'; -select z from ts_limits(); -``` - -This is the result: - -```output - max_ts: 294276-12-31 23:59:59+00 - max_ts overflowed - - min_ts: 4714-11-24 00:00:00+00 BC - min_ts underflowed -``` - -You see the same date and time-of-day values, but without the timezone offset of course, if you define the _ts_t_ domain type using plain _timestamp_. - -This test is shown for completeness. Its outcome is of little practical consequence. You can rely on the values in the "Min" and "Max" columns in the table above, copied from the [PostgreSQL documentation](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-DATETIME-TABLE), to specify the _supported_ range. Yugabyte recommends that, as a practical compromise, you take these to be the limits for _timestamp[tz]_ values: - -```output -['4713-01-01 00:00:00 BC', '294276-12-31 23:59:59 AD'] -``` - -Notice that the minimum and maximum _interval_ values are not specified in the table above. You need to understand how an _interval_ value is represented internally as a three-field _[mm, dd, ss]_ tuple to appreciate that the limits must be expressed individually in terms of these fields. The section [_interval_ value limits](./date-time-data-types-semantics/type-interval/interval-limits/) explains all this. -{{< /note >}} - -Modern applications almost always are designed for global deployment. This means that they must accommodate timezones—and that it will be the norm therefore to use the _timestamptz_ data type and not _date_, plain _time_, or plain _timestamp_. Application code will therefore need to be aware of, and to set, the timezone. It's not uncommon to expose the ability to set the timezone to the user so that _date-time_ moments can be shown differently according to the user's present purpose. - -## Special date-time manifest constants - -PostgreSQL, and therefore YSQL, support the use of several special manifest _text_ constants when they are typecast to specified _date-time_ data types, thus: - -| constant | valid with | -| ----------- | ---------------------------------- | -| 'epoch' | date, plain timestamp | -| 'infinity' | date, plain timestamp, timestamptz | -| '-infinity' | date, plain timestamp | -| 'now' | date, plain time, plain timestamp | -| 'today' | date, plain timestamp | -| 'tomorrow' | date, plain timestamp | -| 'yesterday' | date, plain timestamp | -| 'allballs' | plain time | - -Their meanings are given in section [8.5.1.4. Special Values](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-DATETIME-SPECIAL-VALUES) in the PostgreSQL documentation. - -{{< tip title="Avoid using all of these special constants except for 'infinity' and '-infinity'." >}} - -The implementation of the function [random_test_report_for_modeled_age()](./functions/miscellaneous/age/#function-random-test-report-for-modeled-age) shows a common locution where _'infinity'_ and _'-infinity'_ are used to initialize maximum and minimum values for _timestamp_ values that are updated as new _timestamp_ values arise during a loop's execution. - -The constants _'infinity'_ and _'-infinity'_ can be also used to define _range_ values that are unbounded at one end. But this effect can be achieved with more clarity simply by omitting the value at the end of the range that you want to be unbounded. - -The remaining special constants have different kinds of non-obvious results. See the recommendation [Don't use the special manifest constant 'now'](./functions/current-date-time-moment/#avoid-constant-now) on the 'Functions that return the current date-time moment' page. The constants _'today'_, _'tomorrow'_. and _'yesterday'_ all bring analogous risks to those brought by _'now'_. And the intended effects of _'epoch'_ and _'allballs'_ are brought with optimal clarity for the reader by typecasting an appropriately spelled literal value to the required data type, whatever it might be. - -Yugabyte recommends that you avoid using all of the special manifest _text_ _date-time_ constants except for _'infinity'_ and _'-infinity'_. -{{< /tip >}} - -{{< note title="Even 'infinity' and '-infinity' can't be used everywhere that you might expect." >}} -Try this test: - -```plpgsql -select 'infinity'::timestamptz - clock_timestamp(); -``` - -It causes the _22008_ error, _cannot subtract infinite timestamps_. Normally, the difference between two _timestamptz_ values is an _interval_ value. So you might think that the result here would be an infinite interval. But there is no such thing. This attempt: - -```plpgsql -select 'infinity'::interval; -``` - -causes the _22007_ error, _invalid input syntax for type interval: "infinity"_. -{{< /note >}} - -## How to use the date-time data types major section - -Many users of all kinds of SQL databases have reported that they find everything about the _date-time_ story complex and confusing. This explains why this overall section is rather big and why the hierarchy of pages and child pages is both wide and deep. The order presented in the left-hand navigation menu was designed so that the pages can be read just like the sections and subsections in a book. The overall pedagogy was designed with this reading order in mind. It is highly recommended, therefore, that you (at least once) read the whole story from start to finish in this order. - -If you have to maintain extant application code, you'll probably need to understand everything that this overall section explains. This is likely to be especially the case when the legacy code is old and has, therefore, been migrated from PostgreSQL to YugabyteDB. - -However, if your purpose is only to write brand-new application code, and if you're happy simply to accept Yugabyte's various recommendations without studying the reasoning that supports these, then you'll need to read only a small part of this overall major section. This is what you need: - -- **[Conceptual background](./conceptual-background/)** -- **[Real timezones that observe Daylight Savings Time](./timezones/extended-timezone-names/canonical-real-country-with-dst/)** -- **[Real timezones that don't observe Daylight Savings Time](./timezones/extended-timezone-names/canonical-real-country-no-dst/)** -- **[The plain timestamp and timestamptz data types](./date-time-data-types-semantics/type-timestamp/)** -- **[Sensitivity of converting between timestamptz and plain timestamp to the UTC offset](./timezones/timezone-sensitive-operations/timestamptz-plain-timestamp-conversion/)** -- **[Sensitivity of timestamptz-interval arithmetic to the current timezone](./timezones/timezone-sensitive-operations/timestamptz-interval-day-arithmetic/)** -- **[Recommended practice for specifying the UTC offset](./timezones/recommendation/)** -- **[Custom domain types for specializing the native interval functionality](./date-time-data-types-semantics/type-interval/custom-interval-domains/)** diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/_index.md deleted file mode 100644 index c4dded2efd67..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/_index.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Semantics of the date-time data types [YSQL] -headerTitle: The semantics of the date-time data types -linkTitle: Semantics of the date-time data types -description: The semantics of the date, time, timestamp, timestamptz, and interval data types. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: date-time-data-types-semantics - parent: api-ysql-datatypes-datetime - weight: 60 -type: indexpage ---- - -The following subsections define the semantics of the _date-time_ data types: -- [The _date_ data type](./type-date/) -- [The _time_ data type](./type-time/) -- [The plain _timestamp_ and _timestamptz_ data types](./type-timestamp/) -- [The _interval_ data type and its variants](./type-interval/) diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/_index.md deleted file mode 100644 index 60efd63ee41e..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/_index.md +++ /dev/null @@ -1,214 +0,0 @@ ---- -title: The interval data type [YSQL] -headerTitle: The interval data type -linkTitle: Interval data type -description: The semantics of the interval and data type and its variants. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: type-interval - parent: date-time-data-types-semantics - weight: 40 -type: indexpage -showRightNav: true ---- - -## Why does the interval data type exist? - -Briefly, and trivially, the _interval_ data type exists because the SQL Standard prescribes it. Of course, it does this for a reason: because the semantics of _interval_ arithmetic is rather special and reflect real-world requirements that arise from the [difference between _clock-time-semantics_ and _calendar-time-semantics_](../../conceptual-background/#two-ways-of-conceiving-of-time-calendar-time-and-clock-time). - -### The SQL Standard prescribes support for the interval data type - -Try this: - -```plpgsql -drop function if exists f() cascade; - -create function f() - returns table(t text) - language plpgsql -as $body$ -declare - d1 constant date := '2021-01-13'; - d2 constant date := '2021-02-17'; - - t1 constant time := '13:23:17.000000'; - t2 constant time := '15:37:43.123456'; - - ts1 constant timestamp := '2021-01-13 13:23:17.000000'; - ts2 constant timestamp := '2021-02-17 15:37:43.123456'; - - tstz1 constant timestamptz := '2021-01-13 13:23:17.000000 +04:00'; - tstz2 constant timestamptz := '2021-02-17 15:37:43.123456 -01:00'; -begin - t := 'date: '||(pg_typeof(d2 - d1 ))::text; return next; - t := ''; return next; - t := 'time: '||(pg_typeof(t2 - t1 ))::text; return next; - t := 'timestamp: '||(pg_typeof(ts2 - ts1 ))::text; return next; - t := 'timestamptz: '||(pg_typeof(tstz2 - tstz1))::text; return next; -end; -$body$; - -select t from f(); -``` - -This is the result: - -```output - date: integer - - time: interval - timestamp: interval - timestamptz: interval -``` - -Subtraction isn't supported for _timetz_ values—yet another reason not to use that data type. - -Subtracting _date_ values produces an _int_ value: the number of days between them. In contrast, subtracting _time_, _timestamp_ , and _timestamptz_ values produces an _interval_ value. The SQL Standard prescribes this outcome for these newer data types but not for the earlier _date_ data type. - -### Interval arithmetic semantics - -Try this to see the actual _interval_ value that subtracting _timestamptz_ values produces: - -```plpgsql -select - ( - '2020-03-10 13:47:19.7':: timestamp - - '2020-03-10 12:31:13.5':: timestamp) ::text as "interval 1", - ( - '2020-03-10 00:00:00':: timestamp - - '2020-02-10 00:00:00':: timestamp) ::text as "interval 2"; -``` - -This is the result: - -```output - interval 1 | interval 2 -------------+------------ - 01:16:06.2 | 29 days -``` - -The section [How does YSQL represent an _interval_ value?](./interval-representation/) explains that this _text_ display is not the visualization of just a scalar number of seconds; rather, an _interval_ value is represented as a three-field _[mm, dd, ss]_ tuple. (The first two fields are integers and the last represents a real number of seconds with microsecond precision.) And it explains the reasoning behind this design. The story is complemented by the examples, and the explanations of what they show, in the section [_Interval_ arithmetic](./interval-arithmetic/). - -- The _seconds_ component is externalized as an integral number of _hours_, an integral number of minutes, and a real number of _seconds_. -- The _days_ component is externalized as an integral number of _days_. -- And the _months_ component is externalized as an integral number of _years_ and an integral number of _months_. - -Briefly, the rules for adding or subtracting an _interval_ value to a _timestamptz_ value are different when the value defines a non-zero value for only the _seconds_ component, only the _days_ component, or only the _months_ component. The rule differences are rooted in convention. (The rules are therefore complex when an _interval_ value has more than one non-zero component—so complex that it's very difficult to state requirements that imply such hybrid _interval_ values, and to implement application code that meets such requirements reliably.) - -Here is a sufficient example to illustrate the conceptual difficulty. First, try this: - -```plpgsql -select ('1 day'::interval = '24 hours'::interval)::text; -``` - -The result is _true_. (The implementation of the _interval-interval_ overload of the `=` operator is explained and discussed in the section [Comparing two _interval_ values](./interval-arithmetic/interval-interval-comparison/).) - -Now try this: - -```plpgsql -drop function if exists dd_versus_ss() cascade; - -create function dd_versus_ss() - returns table(x text) - language plpgsql -as $body$ -begin - set timezone = 'America/Los_Angeles'; - declare - i_1_day constant interval := '1 day'; - i_24_hours constant interval := '24 hours'; - - -- Just before DST Starts at 02:00 on Sunday 14-Mar-2021. - t0 constant timestamptz := '2021-03-13 20:00:00'; - - t0_plus_1_day constant timestamptz := t0 + i_1_day; - t0_plus_24_hours constant timestamptz := t0 + i_24_hours; - begin - x := 't0 + ''1 day'': '||t0_plus_1_day ::text; return next; - x := 't0 + ''24 hours'': '||t0_plus_24_hours ::text; return next; - end; -end; -$body$; - -select x from dd_versus_ss(); -``` - -This is the result: - -```output - t0 + '1 day': 2021-03-14 20:00:00-07 - t0 + '24 hours': 2021-03-14 21:00:00-07 -``` - -How can it be that, while _'1 day'_ is equal to _'24 hours'_, _t0 + '1 day'_ is _not_ equal to _t0 + '24 hours'_? The short answer, of course, is that _'1 day'_ is _not_ equal to _'24 hours'_ when _interval_ equality is defined strictly. The native _interval-interval_ overload of the `=` operator implements only a loose notion of _interval_ equality. You also need a _strict_ _interval_ equality notion. The section [The "strict equals" operator](./interval-arithmetic/interval-interval-comparison/#the-strict-equals-interval-interval-operator) shows you how to do this. - -In the present contrived but crucial example, _t0_ is just before the "spring forward" moment in the _America/Los_Angeles_ timezone. And the loosely, but not strictly, equal durations of _'1 day'_ and _'24 hours'_ are both long enough to take you from _Pacific Standard Time_ to _Pacific Daylight Savings Time_. Bearing in mind the _[\[mm, dd, ss\]](./interval-representation/)_ internal representation, you can immediately see this: - -- The semantics of _interval_ arithmetic is different for the _dd_ field of the internal representation than for the _ss_ field. - -This does reflect convention. Are you postponing an appointment by one day? Here you expect the re-scheduled appointment to be at the same time on the next day, whether or not a start or end of Daylight Savings Time intervenes. Or are you making a journey (like a flight) that you know takes twenty-four hours? Here, whether or not a start or end of Daylight Savings Time occurs during the flight crucially affects the arrival time. - -A similar contrived test that uses _interval_ values of _'1 month'_ and _'30 days'_ with a starting moment just before the last day of February in a leap year shows this: - -- The semantics of _interval_ arithmetic is different for the _mm_ field than for the _dd_ field. - -Try this: - -```plpgsql -select ('1 month'::interval = '30 days'::interval)::text; -``` - -The result is _true_. Now try this: - -```plpgsql -drop function if exists mm_versus_dd() cascade; - -create function mm_versus_dd() - returns table(x text) - language plpgsql -as $body$ -begin - set timezone = 'UTC'; - declare - i_1_month constant interval := '1 month'; - i_30_days constant interval := '30 days'; - - -- Just before 29-Feb in a leap year. - t0 constant timestamptz := '2020-02-26 12:00:00'; - - t0_plus_30_days constant timestamptz := t0 + i_30_days; - t0_plus_1_month constant timestamptz := t0 + i_1_month; - begin - x := 't0 + 1 month: '||t0_plus_1_month ::text; return next; - x := 't0 + 30 days: '||t0_plus_30_days ::text; return next; - end; -end; -$body$; - -select x from mm_versus_dd(); -``` - -This is the result: - -```output - t0 + 1 month: 2020-03-26 12:00:00+00 - t0 + 30 days: 2020-03-27 12:00:00+00 -``` - -This outcome, too, does reflect convention. Are you setting up reminders to tell you to water your hardy succulents every month? Here, you simply want a reminder on, say, the 10th of each calendar month. Or are you taking a package tour (starting, say, mid-week) that is advertised to last thirty days? Here, whether or not 29-February in a leap year occurs during the vacation affects when you get back home. - -Everything that explains these differing semantics, and the philosophy of why they should differ, is explained in the section [_Interval_ arithmetic](interval-arithmetic) and its child pages. - -## Organization of the rest of the interval section - -The notions that the account of the _interval_ data type explains are interdependent. The ordering of the following subsections aims to introduce the notions with the minimum dependency on notions yet to be introduced. The account is split into the following main subsections: - -- [How does YSQL represent an _interval_ value?](./interval-representation/) -- [_Interval_ value limits](interval-limits) -- [Declaring _intervals_](declaring-intervals) -- [_Interval_ arithmetic](interval-arithmetic) -- [Defining and using custom domain types to specialize the native _interval_ functionality](custom-interval-domains) -- [User-defined _interval_ utility functions](interval-utilities) - -See the generic section [Typecasting between date-time values and text values](../../typecasting-between-date-time-and-text/) for the account of the ways to construct and to read values for all of the _date-time_ data types—including, therefore, the _interval_ data type. diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-arithmetic/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-arithmetic/_index.md deleted file mode 100644 index 5119c3050fb6..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-arithmetic/_index.md +++ /dev/null @@ -1,248 +0,0 @@ ---- -title: Interval arithmetic [YSQL] -headerTitle: Interval arithmetic -linkTitle: Interval arithmetic -description: Explains the semantics of timestamp-interval arithmetic and interval-only arithmetic. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: interval-arithmetic - parent: type-interval - weight: 40 -type: indexpage -showRightNav: true ---- - -This section uses the term "moment" as an umbrella for a _timestamptz_ value, a _timestamp_ value, or a _time_ value. (In a broader scenario, a _date_ value is also a moment. But you get an _integer_ value when you subtract one _date_ value from another. And you cannot add or subtract an _interval_ value to/from a _date_ value.) The term "_interval_ arithmetic" is used somewhat loosely to denote these three distinct scenarios: - -- **[The interval-interval overload of the "=" operator](#the-interval-interval-overload-of-the-operator)**—_interval-interval_ equality. This is what the term implies. - -- **[Interval-only addition/subtraction and multiplication/division](#interval-only-addition-subtraction-and-multiplication-division):** This has two subtopics: - - - _First_ [The _interval-interval_ overloads of the "+" and "-" operators](#the-interval-interval-overloads-of-the-and-operators) to produce a new _interval_ value. - - - And _second_ [The _interval_-number overloads of the "*" and "/" operators](#the-interval-number-overloads-of-the-and-operators) to produce a new _interval_ value. - -- **[moment-interval arithmetic](#moment-interval-arithmetic):** This has two subtopics: - - - _First_ [The moment-moment overloads of the "-" operator](#the-moment-moment-overloads-of-the-operator) to produce an _interval_ value. (Addition of two moments is meaningless and is therefore illegal. So is multiplication or division of a moment by a number.) - - - And _second_ [The moment-interval overloads of the "+" and "-" operators](#the-moment-interval-overloads-of-the-and-operators) to produce a new moment of the same data type. - -You need to understand the notions that the section [Two ways of conceiving of time: calendar-time and clock-time](../../../conceptual-background/#two-ways-of-conceiving-of-time-calendar-time-and-clock-time) addresses in order to understand the code and the explanations in this page's child page [The moment-_interval_ overloads of the "+" and "-" operators for _timestamptz_, _timestamp_, and _time_](./moment-interval-overloads-of-plus-and-minus/). The notions help you understand how the semantic rules of the native [moment-moment overloads of the "-" operator for timestamptz, timestamp, and time](./moment-moment-overloads-of-minus/) are ultimately confusing and therefore unhelpful—and why you should therefore adopt the practices that the section [Custom domain types for specializing the native _interval_ functionality](../custom-interval-domains/) explains. The distinction between _clock-time-semantics_ and _calendar-time-semantics_ is only implicitly relevant for the notions that the sections [The interval-interval overload of the "=" operator](#the-interval-interval-overload-of-the-operator) and [Interval-only addition/subtraction and multiplication/division](#interval-only-addition-subtraction-and-multiplication-division) explain. - -The PostgreSQL documentation does not carefully specify the semantics of _interval_ arithmetic. This page and its children aim to specify the operations in terms of the individual fields of the [internal representation](../interval-representation/). - -{{< note title="The results of 'interval' arithmetic are, in general, sensitive to the session's timezone." >}} -More carefully stated, the results of some moment-_interval_ arithmetic operations (the moment-moment overloads of the `-` operator and the moment-_interval_ overloads of the `+` and `-` operators) are sensitive to the session's _TimeZone_ setting when the moments are _timestamptz_ values. -{{< /note >}} - -## The interval-interval overload of the "=" operator - -Try this: - -```plpgsql -select - ( - '5 days 1 hours'::interval = - '4 days 25 hours'::interval - )::text as "'1 day' is defined to be equal to '24 hours'", - ( - '5 months 1 day' ::interval = - '4 months 31 days'::interval - )::text as "'1 month' is defined to be equal to '30 days'"; -``` - -The result for each equality expression is _true_. This is a strange definition of equality because there are _29 days_ in February in a leap year and otherwise _28 days_, there are four _30 day_ months, and there are seven _31 day_ months. Further, _1 day_ is only _usually_ _24 hours_. (The _usually_ caveat acknowledges the consequences of Daylight Savings Time changes.) All this crucially effects the semantics—see [_interval_-moment arithmetic](#moment-interval-arithmetic) below. - -The section [Comparing two _interval_ values](./interval-interval-comparison/) explains the model that the _interval_ overload of the `=` operator uses and tests it with a PL/pgSQL implementation. It also shows how to implement a [user-defined _interval-interval_ `==` operator](../interval-utilities#the-user-defined-strict-equals-interval-interval-operator) that implements _strict equality_. The criterion for this is that each field of the LHS and RHS _[\[mm, dd, ss\]](../interval-representation/)_ internal representations must be pairwise equal. - -## Interval-only addition/subtraction and multiplication/division - -Empirical tests show the following: - -- The `+` operator and the `-` operator are overloaded to allow the addition and subtraction of two _interval_ values. Here, the outcome _can_ be understood in terms of pairwise field-by-field addition or subtraction of the two _[mm, dd, ss]_ tuples. - -- The `*` operator and the `/` operator are overloaded to allow multiplication or division of an _interval_ value by a real or integer number. Here, the outcome can be _mainly_ understood in terms of multiplying, or dividing, the _[mm, dd, ss]_ tuple, field-by-field, using the same factor. Notice the caveat _mainly_. In some rare corner cases, the model holds only when the forgiving built-in _interval-interval_ `=` operator is used to compare the outcome of the model with that of the actual functionality. When the [user-defined _strict equality_ _interval-interval_ `==`operator](../interval-utilities#the-user-defined-strict-equals-interval-interval-operator) is used, the tests show that, in these corner cases, the outcome of the model does _not_ agree with that of the actual functionality. - -In all cases of addition/subtraction and multiplication/division, the model assumes that a new intermediate _[mm, dd, ss]_ tuple is produced and that each of the _mm_ or _dd_ fields might well be real numbers. It must be assumed that this intermediate value is then coerced into the required _[integer, integer, real number]_ format using the same algorithm (see the section [Modeling the internal representation and comparing the model with the actual implementation](../interval-representation/internal-representation-model/)) that is used when such a tuple is provided in the _::interval_ typecast approach. - -### The interval-interval overloads of the "+" and "-" operators - -The operation acts separately on the three individual fields of the [internal representation](../interval-representation/) adding or subtracting them pairwise: - -- _[mm1, dd1, ss1] ± [mm2, dd2, ss2] = [(mm1 ± mm2), (dd1 ± dd2), (ss1 ± ss2)]_ - -The section [Adding or subtracting a pair of _interval_ values](./interval-interval-addition/) simulates and tests the model for how this works in PL/pgSQL code. - -Try this simple test: - -```plpgsql -select '2 months'::interval + '2 days'::interval; -``` - -This is the result: - -```output - 2 mons 2 days -``` - -This is consistent with the assumed model. And it shows that a practice that the user might adopt to use only _interval_ values that have just a single non-zero internal representation field can easily be thwarted by _interval-interval_ addition or subtraction. - -### The interval-number overloads of the "*" and "/" operators - -The operation is assumed to be intended to act separately on each of the three individual fields: - -- _[mm, dd, ss]\*x = [mm\*x, dd\*x, ss*x]_ - -When _x_ is equal to _f_, where _f > 1_, the effect is multiplication by _f_. And when _x_ is equal to _1/f_, where _f > 1_, the effect is division by _f_. Therefore a single mental model explains both operations. - -Try this positive test: - -```plpgsql -select - '2 months 2 days'::interval*0.9 as "result 1", - '2 months'::interval*0.9 + '2 days'::interval*0.9 as "result 2"; -``` - -This is the result: - -```output - result 1 | result 2 -------------------------+------------------------ - 1 mon 25 days 19:12:00 | 1 mon 25 days 19:12:00 -``` - -It _is_ consistent with the assumed model. - -Now try this negative test: - -```plpgsql -select - '2 months 2 days'::interval*0.97 as "result 1", - '2 months'::interval*0.97 + '2 days'::interval*0.97 as "result 1"; -``` - -This is the result: - -```output - result 1 | result 1 -------------------------+------------------------ - 1 mon 30 days 03:21:36 | 1 mon 29 days 27:21:36 -``` - -It is _not_ consistent with the assumed model. But the only difference between the positive test and the negative test is that the former uses the factor _0.9_ and the latter uses the factor _0.97_. - -Compare the apparently different results using the forgiving native _interval-interval_ '=' operator like this: - -```plpgsql -select ('1 mon 30 days 03:21:36'::interval = '1 mon 29 days 27:21:36'::interval)::text; -``` - -The result is _true_. The section [Multiplying or dividing an _interval_ value by a number](./interval-number-multiplication/) simulates and tests the model for how this works in PL/pgSQL code, and examines this unexpected outcome closely. - -One thing, at least, is clear: a practice that the user might adopt to use only _interval_ values that have just a single non-zero internal representation field can easily be thwarted by _interval-number_ multiplication or division. Moreover, the semantics of these operations is not documented and cannot be reliably determined by empirical investigation. The outcomes must, therefore, be considered to be unpredictable. - -## Recommendation - -{{< tip title="Avoid native 'interval'-'interval' addition/subtraction and 'interval'-number multiplication/division." >}} -Yugabyte recommends that you avoid performing operations whose results can easily thwart an adopted principle for good practice and especially that you avoid operations whose outcomes must be considered to be unpredictable. It recommends that instead you adopt the practice that the section [Defining and using custom domain types to specialize the native _interval_ functionality](../custom-interval-domains/) explains. Doing this will let you perform the addition, subtraction, multiplication, and division operations that are unsafe with native _interval_ values in a controlled fashion that brings safety. -{{< /tip >}} - -## Moment-interval arithmetic - -The `-` operator has a set of moment-moment overloads and a set of moment-_interval_ overloads. The `+` operator has a set of -_interval_-moment overloads. The `+` operator has no moment-moment overloads. (This operation would make no sense.) - -### The moment-moment overloads of the "-" operator - -The `-` operator has an overload for each pair of operands of the _timestamptz_, _timestamp_, and _time_ data types. The result of subtracting two _date_ values has data type _integer_. Try this: - -```plpgsql -drop function if exists f() cascade; -create function f() - returns table(t text) - language plpgsql -as $body$ -declare - d1 constant date := '2021-01-13'; - d2 constant date := '2021-02-17'; - - t1 constant time := '13:23:17'; - t2 constant time := '15:37:43'; - - ts1 constant timestamp := '2021-01-13 13:23:17'; - ts2 constant timestamp := '2021-02-17 15:37:43'; - - tstz1 constant timestamptz := '2021-01-13 13:23:17 +04:00'; - tstz2 constant timestamptz := '2021-02-17 15:37:43 -01:00'; - -begin - t := 'date: '||(pg_typeof(d2 - d1 ))::text; return next; - t := 'time: '||(pg_typeof(t2 - t1 ))::text; return next; - t := 'timestamp: '||(pg_typeof(ts2 - ts1 ))::text; return next; - t := 'timestamptz: '||(pg_typeof(tstz2 - tstz1))::text; return next; -end; -$body$; - -select t from f(); -``` - -This is the result: - -```output - date: integer - time: interval - timestamp: interval - timestamptz: interval -``` - -The _interval_ value that results from subtracting one moment from another (for the _timestamptz_, _timestamp_, or _time_ data types) has, in general, a non-zero value for each of the _dd_ and _ss_ fields of the internal _[\[mm, dd, ss\]](../interval-representation/)_ representation. The value of the _mm_ field is _always_ zero. The section [The moment-moment overloads of the "-" operator for _timestamptz_, _timestamp_, and _time_](./moment-moment-overloads-of-minus/) explains the algorithm that produces the value and shows that, because it has two fields that have different rules for the semantics of the _interval_-moment overloads of the `+` and `-` operators, this approach for producing an _interval_ value should be avoided. See the section [Custom domain types for specializing the native _interval_ functionality](../custom-interval-domains/) for the recommended alternative approach. - -### The moment-interval overloads of the "+" and "-" operators - -The `+` and `-` operators have overloads for each pair of operands of each of the _timestamptz_, _timestamp_, and _time_ data types with an _interval_ operand. The notions that the section [Two ways of conceiving of time: calendar-time and clock-time](../../../conceptual-background/#two-ways-of-conceiving-of-time-calendar-time-and-clock-time) addresses are critical for the understanding of this functionality. The topic is explained carefully in the child page [The moment-_interval_ overloads of the "+" and "-" operators for _timestamptz_, _timestamp_, and _time_](./moment-interval-overloads-of-plus-and-minus/). This test is copied from that page: - -```plpgsql -select ( - '30 days '::interval = '720 hours'::interval and - ' 1 month'::interval = ' 30 days '::interval and - ' 1 month'::interval = '720 hours'::interval - )::text; -``` - -The result is _true_, showing that (at least in an inexact sense), the three spellings _'720 hours'_, _'30 days'_, and '_1 month_' all denote the same _interval_ value. Critically, though, _'720 hours'_ is a _clock-time-semantics_ notion while _'30 days'_ and '_1 month_' are each _calendar-time-semantics_ notions, though in subtly different ways. This explains the outcome of this test—again, copied from (but in a simplified form) the child page: - -```plpgsql -drop table if exists t; -create table t( - t0 timestamptz primary key, - "t0 + 720 hours" timestamptz, - "t0 + 30 days" timestamptz, - "t0 + 1 month" timestamptz); - -insert into t(t0) values ('2021-02-19 12:00:00 America/Los_Angeles'); - -set timezone = 'America/Los_Angeles'; - -update t set - "t0 + 720 hours" = t0 + '720 hours' ::interval, - "t0 + 30 days" = t0 + '30 days' ::interval, - "t0 + 1 month" = t0 + '1 month' ::interval; - -select - t0, - "t0 + 720 hours", - "t0 + 30 days", - "t0 + 1 month" -from t; -``` - -This is the result: - -```output - t0 | t0 + 720 hours | t0 + 30 days | t0 + 1 month -------------------------+------------------------+------------------------+------------------------ - 2021-02-19 12:00:00-08 | 2021-03-21 13:00:00-07 | 2021-03-21 12:00:00-07 | 2021-03-19 12:00:00-07 -``` - -The fact that adding the (inexactly) "same" value produces three different results motivates the careful, and rather tricky, discussion of _clock-time_ and the two sub-flavors of _calendar-time_ (days versus months and years). diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-representation/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-representation/_index.md deleted file mode 100644 index 21918af9f09a..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/date-time-data-types-semantics/type-interval/interval-representation/_index.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: The internal representation of an interval value [YSQL] -headerTitle: How does YSQL represent an interval value? -linkTitle: Interval representation -description: Explains how interval value is represented internally as three fields (months, days, and seconds). [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: interval-representation - parent: type-interval - weight: 10 -type: indexpage -showRightNav: true ---- - -{{< tip title="Download and install the date-time utilities code." >}} -The code on this page and on its child, [Modeling the internal representation and comparing the model with the actual implementation](./internal-representation-model/), depends on the code presented in the section [User-defined _interval_ utility functions](../interval-utilities/). This is included in the larger [code kit](../../../download-date-time-utilities/) that includes all of the reusable code that the overall _[date-time](../../../../type_datetime/)_ section describes and uses. -{{< /tip >}} - -The PostgreSQL documentation, under the table [Interval Input](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT-EXAMPLES), says this: - -> Internally, _interval_ values are stored as months, days, and seconds. This is done because the number of days in a month varies, and a day can have 23 or 25 hours if a Daylight Savings Time adjustment is involved. The months and days fields are integers while the seconds field can store fractions. Because intervals are usually created from constant strings or timestamp subtraction, this storage method works well in most cases, but can cause unexpected results. - -Inspection of the C code of the implementation shows that the _mm_ and _dd_ fields of the _[mm, dd, ss]_ internal implementation tuple are four-byte integers. The _ss_ field is an eight-byte integer that records the value in microseconds. - -The reference to Daylight Savings Time is a nod to the critical distinction between [_clock-time-semantics_](../../../conceptual-background/#clock-time) and [_calendar-time-semantics_](../../../conceptual-background/#calendar-time). Notice the use of "unexpected". It is better to say that your ability confidently to predict the outcome of _interval_ arithmetic rests on a relatively elaborate mental model. This model has two complementary parts: - -- How the values of the three fields of the _[mm, dd, ss]_ representation of an _interval_ value are computed when an _interval_ value is created. The present _"How does YSQL represent an interval value?"_ section addresses this. - -- The different semantics of these three fields when an _interval_ value is added or subtracted to/from a _timestamptz_ value, a _timestamp_ value, or a _time_ value or when an _interval_ value is created by subtracting one moment (typically a plain _timestamp_ value or a _timestamptz_value_) from another. This is addressed in the section [_Interval_ arithmetic](../interval-arithmetic/). - -As long as you have a robust mental model, then your results will not be unexpected. This section explains the mental model for _interval_ value creation. It enables you to predict what values for _months_, _days_, and _seconds_ will be represented internally when you specify an _interval_ value using values for _years_, _months_, _days_, _hours_, _minutes_, and _seconds_. And it enables you to predict what values for _years_, _months_, _days_, _hours_, _minutes_, and _seconds_ you will read back from an _interval_ value whose _months_, _days_, and _seconds_ values you have managed to predict. - -The value recorded by each of the three fields of the representation can be arbitrarily large with respect to the conventions that say, for example, that _25 hours_ is _1 day_ and _1 hour_. For example, this tuple is allowed: _99 months 700 days 926351.522816 seconds_. (Of course, the physical internal representation does impose some limits. See the section [_interval_ value limits](../interval-limits/).) - -**Note:** The internal sixteen-byte format of the internal _[mm, dd, ss]_ representation of an _interval_ value determines the theoretical upper limits on the values of each of the three fields. Other factors determine the actual limits. This is explained in the section [_interval_ value limits](../interval-limits/). - -## Ad hoc examples - -There are no built-in functions or operators that let you display the _months_, _days_, and _seconds_ "as is" from the internal representation. Rather, you can display only canonically derived values for _years_, _months_, _days_, _hours_, _minutes_, and _seconds_. The rule for extracting these values from the internal representation is simple and intuitive. It is presented as executable PL/pgSQL in the implementation of the function [_parameterization (interval_mm_dd_ss_t)_](../interval-utilities/#function-parameterization-interval-mm-dd-ss-t-returns-interval-parameterization-t) in the section [User-defined _interval_ utility functions](../interval-utilities/). Briefly, the internal integral _months_ value is displayed as integral _years_ and integral _months_ by taking one _year_ to be 12 _months_; the internal integral _days_ value is displayed "as is"; and the real number internal _seconds_ is displayed as integral _hours_, integral _minutes_, and real number _seconds_ by taking one _hour_ to be _60 minutes_ and _one minute_ to be _60 seconds_. - -The section [Ad hoc examples of defining _interval_ values](./ad-hoc-examples/) provides six examples that give a flavor of the complexity of the rules. - -## Modeling the internal representation and comparing the model with the actual implementation - -The best way to express a statement of the rules that are consistent with the outcomes of the six [Ad hoc examples of defining _interval_ values](./ad-hoc-examples/), and any number of other examples that you might try, is to implement an executable simulation and to compare its outputs with the outputs that the actual PostgreSQL, and therefore YSQL, implementations produce. - -**Note:** If you follow the recommendations made below, you can simply skip attempting to understand these tricky rules without sacrificing any useful functionality. - -The section [Modeling the internal representation and comparing the model with the actual implementation](./internal-representation-model/) presents this. Here is the algorithm, copied from the body of [function interval_mm_dd_ss (interval_parameterization_t)](./internal-representation-model/#function-interval-mm-dd-ss-interval-parameterization-t-returns-interval-mm-dd-ss-t): - -```output --- The input values are "p.yy", "p.mm", "p.dd", "p.hh", "p.mi", and "p.ss" — i.e. the --- conventional parameterization of an "interval" value used by the "::interval" typecast --- and the "make_interval()" approaches. - --- The output values are "mm_out", "dd_out", and "ss_out" — i.e. the fields of the internal --- representation tuple. - --- "mm_per_yy", "dd_per_mm", "ss_per_dd", "ss_per_hh", and "ss_per_mi" are constants --- with the meanings that the mnemonics suggest: the number of months in a year, --- and so on. -``` - -```output -mm_trunc constant int not null := trunc(p.mm); -mm_remainder constant double precision not null := p.mm - mm_trunc::double precision; - --- This is a quirk. -mm_out constant int not null := trunc(p.yy*mm_per_yy) + mm_trunc; - -dd_real_from_mm constant double precision not null := mm_remainder*dd_per_mm; - -dd_int_from_mm constant int not null := trunc(dd_real_from_mm); -dd_remainder_from_mm constant double precision not null := dd_real_from_mm - dd_int_from_mm::double precision; - -dd_int_from_user constant int not null := trunc(p.dd); -dd_remainder_from_user constant double precision not null := p.dd - dd_int_from_user::double precision; - -dd_out constant int not null := dd_int_from_mm + dd_int_from_user; - -d_remainder constant double precision not null := dd_remainder_from_mm + dd_remainder_from_user; - -ss_out constant double precision not null := d_remainder*ss_per_dd + - p.hh*ss_per_hh + - p.mi*ss_per_mi + - p.ss; -``` - -{{< tip title="The algorithm is too hard to remember and produces unhelpful outcomes." >}} -Yugabyte staff members have carefully considered the rules that this algorithm expresses. They have the property that when non-integral values are used in the _::interval_ typecast approach, even a literal that specifies, for example, only months can result in an internal _[mm, dd, ss]_ tuple where each of the fields is non-zero. Try this: - -```plpgsql -select interval_mm_dd_ss('11.674523 months '::interval)::text; -``` - -(The function [interval_mm_dd_ss (interval)](../interval-utilities/#function-interval-mm-dd-ss-interval-returns-interval-mm-dd-ss-t) is defined in the section [User-defined _interval_ utility functions](../interval-utilities/). This is the result: - -```output - (11,20,20363.616) -``` - -The section [Interval arithmetic](../interval-arithmetic/) explains that the semantics is critically different for each of the internal representation's fields. It recommends that you use only _interval_ values where just one of the three fields is non-zero. The section [Custom domain types for specializing the native _interval_ functionality](../custom-interval-domains/) shows how to impose this discipline programmatically. -{{< /tip >}} - -## Possible upcoming implementation change - -{{< tip title="Heads up." >}} -There has been some discussion on the [pgsql-general](mailto:pgsql-general@lists.postgresql.org) and [pgsql-hackers](mailto:pgsql-hackers@lists.postgresql.org) mail lists about the algorithm whose implementation that the function _interval()_ documents. As a result, a patch has been developed for a future version of the PostgreSQL system that makes some subtle changes to the "spill-down" behavior in response to real number input values for _years_, _months_, _days_, _hours_, and _minutes_ when you use the _::interval_ typecast approach to construct an _interval_ value. When YugabyteDB adopts this patch, the implementation of the function [interval_mm_dd_ss (interval_parameterization_t)](./internal-representation-model/#function-interval-mm-dd-ss-interval-parameterization-t-returns-interval-mm-dd-ss-t) will be changed accordingly. - -If you follow Yugabyte's recommendation to construct _interval_ values using only integral values for _years_, _months_, _days_, _hours_, and _minutes_ (or, equivalently, always to use the _make_interval()_ SQL built-in function rather than the _::interval_ typecast approach), then your application code will not see a behavior change when you move to a version of YugabyteDB that implements this patch. As mentioned above, the section [Custom domain types for specializing the native _interval_ functionality](../custom-interval-domains/) shows how to impose this discipline programmatically. -{{< /tip >}} diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/functions/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/functions/_index.md deleted file mode 100644 index a16f5ecf34b6..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/functions/_index.md +++ /dev/null @@ -1,157 +0,0 @@ ---- -title: General-purpose date and time functions [YSQL] -headerTitle: General-purpose date and time functions -linkTitle: General-purpose functions -description: Describes the general-purpose date and time functions. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: date-time-functions - parent: api-ysql-datatypes-datetime - weight: 90 -type: indexpage -showRightNav: true ---- - -This page lists all of the general-purpose _date-time_ functions. They are classified into groups according to the purpose. - -- [Creating date-time values](#functions-for-creating-date-time-values) -- [Manipulating date-time values](#functions-for-manipulating-date-time-values) -- [Current date-time moment](#functions-that-return-the-current-date-time-moment) -- [Delaying execution](#functions-for-delaying-execution) -- [Miscellaneous](#miscellaneous-functions) - -Notice that the so-called _date-time_ formatting functions, like: - -- _to_date()_ or _to_timestamp()_, that convert a _text_ value to a _date-time_ value - -- and to _char()_, that converts a _date-time_ value to a _text_ value - -are described in the dedicated [Date and time formatting functions](../formatting-functions/) section. - -{{< note title="Functions without trailing parentheses" >}} -Normally in PostgreSQL, and therefore in YSQL, a function invocation must be written with trailing parentheses—even when the invocation doesn't specify any actual arguments. These five date-time functions are exceptions to that rule: - -- _current_date_, _current_time_, _current_timestamp_, _localtime_, and _localtimestamp_. - -Notice that the \\_df_ meta-command produces no output for each of these five functions. - -Each of these is in the group [functions that return the current date-time moment](#functions-that-return-the-current-date-time-moment-current-date-time-moment). If you invoke one of these using empty trailing parentheses, then you get the generic _42601_ syntax error. Each of these five names is reserved in SQL. For example, if you try to create a table with a column whose name is one of these five (without trailing parentheses in this case, of course), then you get the same _42601_ error. Notice that within this set of five exceptional functions that must not be invoked with empty trailing parentheses, these four have a variant that has a single _precision_ parameter: _current_time(precision)_, _current_timestamp(precision)_, _localtime(precision)_, and _localtimestamp(precision)_. This specifies the precision of the seconds value. (This explains why _current_date_ has no _precision_ variant.) - -All of the other _date-time_ functions that this page lists must be written with trailing parentheses—conforming to the norm for function invocation. (Without trailing parentheses, it is taken as a name for a column in a user-created table or for a variable in PL/pgSQL. - -You should regard the exceptional status of the _current_date_, _current_time_, _current_timestamp_, _localtime_, and _localtimestamp_ _date-time_ functions simply as a quirk. There are other such quirky functions. See this note in the section [9.25. System Information Functions](https://www.postgresql.org/docs/15/functions-info.html) in the PostgreSQL documentation: - -> _current_catalog_, _current_role_, _current_schema_, _current_user_, _session_user_, and _user_ have special syntactic status [in the SQL Standard]: they must be called without trailing parentheses. In PostgreSQL, parentheses can optionally be used with _current_schema_, but not with the others. -{{< /note >}} - -The following tables list all of the general purpose _date_time_ built-in functions, classified by purpose. - -## Functions for creating date-time values - -**[Here](./creating-date-time-values)**. - -| | **return data type** | -| ----------------------------------------------------------------------------------------------- | -------------------- | -| [make_date()](./creating-date-time-values#function-make-date-returns-date) | date | -| [make_time()](./creating-date-time-values#function-make-time-returns-plain-time) | (plain) time | -| [make_timestamp()](./creating-date-time-values#function-make-timestamp-returns-plain-timestamp) | (plain) timestamp | -| [make_timestamptz()](./creating-date-time-values#function-make-timestamptz-returns-timestamptz) | timestamptz | -| [to_timestamp()](./creating-date-time-values#function-to-timestamp-returns-timestamptz) | timestamptz | -| [make_interval()](./creating-date-time-values#function-make-interval-returns-interval) | interval | - -## Functions for manipulating date-time values - -**[Here](./manipulating-date-time-values)**. - -| | **return data type** | -| ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ | -| [date_trunc()](./manipulating-date-time-values#function-date-trunc-returns-plain-timestamp-timestamptz-interval) | plain timestamp \| timestamptz \| interval | -| [justify_days() \| justify_hours() \| justify_interval()](./manipulating-date-time-values#function-justify-days-justify-hours-justify-interval-returns-interval) | interval | - -## Functions that return the current date-time moment - -**[Here](./current-date-time-moment)**. - -There are several built-in SQL functions for returning the current date-time moment because there are different notions of currency: - -- right now at the instant of reading, independently of statements and transactions; -- as of the start of the current individual SQL statement within an on-going transaction; -- as of the start of the current transaction. - -| | **return data type** | **Moment kind** | -| ----------------------------------------------------------------------------------- | -------------------- | -------------------- | -| [current_date](./current-date-time-moment) | date | start of transaction | -| [localtime](./current-date-time-moment) | time | start of transaction | -| [current_time](./current-date-time-moment) | timetz | start of transaction | -| [localtimestamp](./current-date-time-moment) | plain timestamp | start of transaction | -| [transaction_timestamp() \| now() \| current_timestamp](./current-date-time-moment) | timestamptz | start of transaction | -| [statement_timestamp()](./current-date-time-moment) | timestamptz | start of statement | -| [clock_timestamp()](./current-date-time-moment) | timestamptz | instantaneous | -| [timeofday()](#avoid-timeofday) | text | instantaneous | - -Notice that _timeofday()_ has the identical effect to `to_char(clock_timestamp(),'Dy Mon dd hh24:mi:ss.us yyyy TZ')`. But notice that the use of plain _'Dy'_ and plain _'Mon'_, rather than _'TMDy'_ and _'TMMon'_, calls specifically for the English abbreviations—in other words, _timeofday()_ non-negotiably returns an English text value. - -Try this: - -```plpgsql --- Because "fmt" uses the plain forms "Dy" and "Mon", the test is insensitve to the value of "lc_time". --- Setting it here to Finnish simply emphasizes this point. -set lc_time = 'fi_FI'; - -set timezone = 'America/Los_Angeles'; - -drop procedure if exists assert_timeofday_semantics() cascade; -create procedure assert_timeofday_semantics() - language plpgsql -as $body$ -declare - clk_1 timestamptz not null := clock_timestamp(); - clk_2 timestamptz not null := clk_1; - - tod_1 text not null := ''; - tod_2 text not null := ''; - dummy text not null := ''; - fmt constant text not null := 'Dy Mon dd hh24:mi:ss.us yyyy TZ'; -begin - select - clock_timestamp(), timeofday(), pg_sleep(2), clock_timestamp(), timeofday() into - clk_1, tod_1, dummy, clk_2, tod_2; - - assert tod_1 = to_char(clk_1, fmt), 'Assert #1 failed'; - assert tod_2 = to_char(clk_2, fmt), 'Assert #2 failed'; -end; -$body$; - -call assert_timeofday_semantics(); -``` - -Presumably, because it takes time to execute each individual PL/pgSQL statement, the moment values returned by the first calls to _clock_timestamp()_ and _timeofday()_, and then by the second calls to these two functions, will not be pairwise identical. However, they are the same to within a one microsecond precision. This is fortunate because it does away with the need to implement a tolerance notion and therefore simplifies the design of the test. - -{{< tip title="Don't use 'timeofday()'." >}} -Using _clock_timestamp()_, and formatting the result to _text_, can bring the identical result to using _timeofday()_—if this meets your requirement. However, you might well want a different formatting notion and might want to render day and month names or abbreviations in a language other than English. Moreover, you might want to do arithmetic with the moment value, for example by subtracting it from some other moment value. Yugabyte recommends, therefore, that you simply avoid ever using _timeofday()_ and, rather, always start with _clock_timestamp()_. - -For this reason, this section won't say any more about the _timeofday()_ builtin function. -{{< /tip >}} - -## Functions for delaying execution - -**[Here](./delaying-execution)**. - -| | **return data type** | -| ----------------------------------------------------------------------------- | -------------------- | -| [pg_sleep()](./delaying-execution#function-pg-sleep-returns-void) | void | -| [pg_sleep_for()](./delaying-execution#function-pg-sleep-for-returns-void) | void | -| [pg_sleep_until()](./delaying-execution#function-pg-sleep-until-returns-void) | void | - -## Miscellaneous functions - -**[Here](./miscellaneous/)**. - -| | **return data type** | -| ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ | -| [isfinite()](./miscellaneous#function-isfinite-returns-boolean) | boolean | -| [age()](./miscellaneous#function-age-returns-interval) | interval | -| [extract() \| date_part()](./miscellaneous#function-extract-function-date-part-returns-double-precision) | double-precision | -| [timezone() \| at time zone operator](./miscellaneous#function-timezone-at-time-zone-operator-returns-timestamp-timestamptz) | time \| timetz \| timestamp \| timestamptz | -| [overlaps operator](./miscellaneous#overlaps-operator-returns-boolean) | boolean | diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/functions/miscellaneous/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/functions/miscellaneous/_index.md deleted file mode 100644 index e7241b11f923..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/functions/miscellaneous/_index.md +++ /dev/null @@ -1,599 +0,0 @@ ---- -title: Miscellaneous date-time functions [YSQL] -headerTitle: Miscellaneous date-time functions -linkTitle: Miscellaneous -description: The semantics of the miscellaneous date-time functions. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: miscellaneous - parent: date-time-functions - weight: 60 -type: indexpage -showRightNav: true ---- - -## function isfinite() returns boolean - -Here is the interesting part of the output from \\_df isfinite()_: - -```output - Result data type | Argument data types -------------------+----------------------------- - boolean | abstime - boolean | date - boolean | interval - boolean | timestamp with time zone - boolean | timestamp without time zone -``` - -The data type _abstime_ is for internal use only. It inevitably shows up in the \\_df_ output. But you should simply forget that it exists. - -Here's a trivial demonstration of the meaning of the function _isfinite()_: - -```plpgsql -do $body$ -begin - assert not isfinite( 'infinity'::timestamptz), 'Assert #1 failed'; - assert not isfinite('-infinity'::timestamptz), 'Assert #2 failed'; -end; -$body$; -``` - -The block finishes without error. - -## function age() returns interval - -Nominally, _age()_ returns the age of something "now" with respect to a date of birth. The value of "now" can be given: _either_ explicitly, using the two-parameter overload, as the invocation's first actual argument; _or_ implicitly, using the one-parameter overload, as _date_trunc('day', clock_timestamp())_. The value for the date of birth is given, for both overloads, as the invocation's last actual argument. Of course, this statement of purpose is circular because it avoids saying precisely how age is defined—and why a notion is needed that's different from what is given simply by subtracting the date of birth from "now", using the native minus operator, `-`. - -Here is the interesting part of the output from \\_df age()_. The rows were re-ordered manually and whitespace was manually added to improve the readability: - -```output - Result data type | Argument data types -------------------+---------------------------------------------------------- - interval | timestamp without time zone, timestamp without time zone - interval | timestamp with time zone, timestamp with time zone - - interval | timestamp without time zone - interval | timestamp with time zone -``` - -{{< note title="The 'xid' overload of 'age()' has nothing to do with date-time data types" >}} -There's an overload with _xid_ argument data type (and with _integer_ return). The present [Date and time data types](../../../type_datetime/) major section does not describe the _xid_ overload of _age()_. -{{< /note >}} - -This section first discusses age as a notion. Then it defines the semantics of the two-parameter overload of the built-in _age()_ function by modeling its implementation. The semantics of the one-parameter overload is defined trivially in terms of the semantics of the two-parameter overload. - -### The definition of age is a matter of convention - -Age is defined as the length of time that a person (or a pet, a tree, a car, a building, a civilization, the planet Earth, the Universe, or any phenomenon of interest) has lived (or has been in existence). Here is a plausible formula in the strict domain of date-time arithmetic: - -```output -age ◄— todays_date - date_of_birth -``` - -If _todays_date_ and _date_of_birth_ are _date_ values, then _age_ is produced as an _int_ value. And if _todays_date_ and _date_of_birth_ are plain _timestamp_ values (or _timestamptz_ values), then _age_ is produced as an _interval_ value. As long as the time-of-day component of each plain _timestamp_ value is exactly _00:00:00_ (and this is how people think of dates and ages) then only the _dd_ component of the internal _[\[mm, dd, ss\]](../../date-time-data-types-semantics/type-interval/interval-representation/)_ representation of the resulting _interval_ value will be non-zero. Try this: - -```plpgsql -drop function age_in_days(text, text); -create function age_in_days(today_text in text, dob_text in text) - returns table (z text) - language plpgsql -as $body$ -declare - d_today constant date not null := today_text; - d_dob constant date not null := dob_text; - t_today constant timestamp not null := today_text; - t_dob constant timestamp not null := dob_text; -begin - z := (d_today - d_dob)::text; return next; - z := (t_today - t_dob)::text; return next; -end; -$body$; - -select z from age_in_days('290000-08-17', '0999-01-04 BC'); -``` - -This is the result: - -```output - 106285063 - 106285063 days -``` - -{{< note title="The value of the 'dd' field has an upper limit of 109,203,124" >}} -See the subsection [procedure assert_interval_days_in_range (days in bigint)](../../date-time-data-types-semantics/type-interval/custom-interval-domains/#procedure-assert-interval-days-in-range-days-in-bigint) on the [Custom domain types for specializing the native interval functionality](../../date-time-data-types-semantics/type-interval/custom-interval-domains/) page. -{{< /note >}} - -However, how ages are stated is very much a matter of convention. Beyond, say, one's mid teens, it is given simply as an integral number of years. (Sue Townsend's novel title, "The Secret Diary of Adrian Mole, Aged 13 3/4", tells the reader that it's a humorous work and that Adrian is childish for his years.) The answer to "What is the age of the earth?" is usually given as "about 4.5 billion years"—and this formulation implies that a precision of about one hundred thousand years is appropriate. At the other end of the spectrum, the age of new born babies is usually given first as an integral number of days, and later, but while still a toddler, as an integral number of months. Internet search finds articles with titles like "Your toddler's developmental milestones at 18 months". You'll even hear age given as, say, "25 months". - -Internet search finds lots of formulas to calculate age in years—usually using spreadsheet arithmetic. It's easy to translate what they do into SQL primitives. The essential point of the formula is that if today's month-and-date is earlier in the year than the month-and-date of the date-of-birth, then you haven't yet reached your birthday. - - Try this: - -```plpgsql -drop function age_in_years(text, text); -create function age_in_years(today_tz in timestamptz, dob_tz in timestamptz) - returns interval - language plpgsql -as $body$ -declare - d_today constant date not null := today_tz; - d_dob constant date not null := dob_tz; - - yy_today constant int not null := extract(year from d_today); - mm_today constant int not null := extract(month from d_today); - dd_today constant int not null := extract(day from d_today); - - yy_dob constant int not null := extract(year from d_dob); - mm_dob constant int not null := extract(month from d_dob); - dd_dob constant int not null := extract(day from d_dob); - - mm_dd_today constant date not null := make_date(year=>1, month=>mm_today, day=>dd_today); - mm_dd_dob constant date not null := make_date(year=>1, month=>mm_dob, day=>dd_dob); - - -- Is today's mm-dd greater than dob's mm-dd? - delta constant int not null := case - when mm_dd_today >= mm_dd_dob then 0 - else -1 - end; - age constant interval not null := make_interval(years=>(yy_today - yy_dob + delta)); -begin - return age; -end; -$body$; - -set timezone = 'America/Los_Angeles'; -select - age_in_years('2007-02-13', '1984-02-14')::text as "age one day before birthday", - age_in_years('2007-02-14', '1984-02-14')::text as "age on birthday", - age_in_years('2007-02-15', '1984-02-14')::text as "age one day after birthday", - age_in_years(clock_timestamp(), '1984-02-14')::text as "age right now"; -``` - -This is the result (when the _select_ is executed in October 2021): - -```output - age one day before birthday | age on birthday | age one day after birthday | age right now ------------------------------+-----------------+----------------------------+--------------- - 22 years | 23 years | 23 years | 37 years -``` - -You can easily derive the function _age_in_months()_ from the function _age_in_years()_. Then, with all three functions in place, _age_in_days()_, _age_in_months()_, and _age_in_years()_, you can implement an _age()_ function that applies a rule-of-thumb, based on threshold values for what _age_in_days()_ returns, to return either a pure days, a pure months, or a pure years _interval_ value. This is left as an exercise for the reader. - -### The semantics of the built-in function age() - -{{< note title="The following account relies on understanding the internal representation of an 'interval' value" >}} -The internal representation of an _interval_ value is a _[mm, dd, ss]_ tuple. This is explained in the section [How does YSQL represent an _interval_ value?](../../date-time-data-types-semantics/type-interval/interval-representation/). -{{< /note >}} - -Bare _timestamp_ subtraction produces a result where the _yy_ field is always _zero_ and only the _mm_ and _dd_ fields might be non-zero, thus: - -```plpgsql -select ( - '2001-04-10 12:43:17'::timestamp - - '1957-06-13 11:41:13'::timestamp)::text; -``` - -This is the result: - -```output - 16007 days 01:02:04 -``` - -See the section [The moment-moment overloads of the "-" operator for _timestamptz_, _timestamp_, and _time_](../../date-time-data-types-semantics/type-interval/interval-arithmetic/moment-moment-overloads-of-minus/) for more information. - -The PostgreSQL documentation, in [Table 9.30. Date/Time Functions](https://www.postgresql.org/docs/15/functions-datetime.html#FUNCTIONS-DATETIME-TABLE), describes how _age()_ calculates its result thus: - -> Subtract arguments, producing a "symbolic" result that uses years and months, rather than just days - -and it gives this example: - -```plpgsql -select age( - '2001-04-10'::timestamp, - '1957-06-13'::timestamp)::text; -``` - -with this result: - -```output -43 years 9 mons 27 days -``` - -Because the result data type is _interval_, and there's no such thing as a "symbolic" _interval_ value, this description is simply nonsense. It presumably means that the result is a hybrid _interval_ value where the _yy_ field might be non-zero. - -{{< note title="'age(t2, ts1)' versus 'justify_interval(ts2 - ts1)'" >}} -While, as was shown above, subtracting one _timestamp[tz]_ value from another produces an _interval_ value whose _mm_ component is always _zero_, you can use _justify_interval()_ to produce a value that, in general, has a _non-zero_ value for each of the _mm_, dd_, and _ss_ components. However, the actual value produced by doing this will, in general, differ from that produced by invoking _age()_, even when the results are compared with the native equals operator, `=`, (and not the [user-defined "strict equals"](../../date-time-data-types-semantics/type-interval/interval-utilities/#the-user-defined-strict-equals-interval-interval-operator) operator, `==`). Try this: - -```plpsql -set timezone = 'UTC'; -with - c1 as ( - select - '2021-03-17 13:43:19 America/Los_Angeles'::timestamptz as ts2, - '2000-05-19 11:19:13 America/Los_Angeles'::timestamptz as ts1), - c2 as ( - select - age (ts2, ts1) as a, - justify_interval(ts2 - ts1) as j - from c1) -select - a::text as "age(ts2, ts1)", - j::text as "justify_interval(ts2 - ts1)", - (a = j)::text as "age() = justify_interval() using native equals" -from c2; -``` - -This is the result: - -```output - age(ts2, ts1) | justify_interval(ts2 - ts1) | age() = justify_interval() using native equals -----------------------------------+---------------------------------+------------------------------------------------ - 20 years 9 mons 29 days 02:24:06 | 21 years 1 mon 17 days 02:24:06 | false -``` - -They differ simply because _justify_interval()_ uses one rule (see the subsection [The _justify_hours()_, _justify_days()_, and _justify_interval()_ built-in functions](../../date-time-data-types-semantics/type-interval/justfy-and-extract-epoch/#the-justify-hours-justify-days-and-justify-interval-built-in-functions)) and _age()_ uses a different rule (see the subsection [The semantics of the two-parameter overload of function _age()_](./age/#the-semantics-of-the-two-parameter-overload-of-function-age-timestamp-tz-timestamp-tz)). You should understand the rule that each uses and then decide what you need. But notice [Yugabyte's recommendation](#avoid-using-age), below, simply to avoid using the built-in _age()_ function. -{{< /note >}} - -Anyway, the phrase _producing a "symbolic" result_ gives no clue about how _age()_ works in the general case. But it looks like this is what it did with the example above: - -- It tried to subtract _"13 days"_ from _"10 days"_ and "borrowed" one month to produce a positive result. As it happens, both June and April have 30 days (with no leap year variation). The result, _"(30 + 10) - 13"_, is _"27 days"_. - -- It tried to subtract _"6 months"_ from _"3 months"_ (decremented by one month from its starting value, _"4 months"_, to account for the "borrowed" month), and "borrowed" one year to produce a positive result. One year is always twelve months. The result, _"(12 + 3) - 6"_, is _"9 months"_. - -- Finally, it subtracted _"1957 years"_ from _"2000 years"_ (decremented by one year from its starting value, _"2021 years"_, to account for the "borrowed" year). - -Here is another example of the result that _age()_ produces when the inputs have non-zero time-of-day components: - -```plpgsql -select age( - '2001-04-10 11:19:17'::timestamp, - '1957-06-13 15:31:42'::timestamp)::text; -``` - -with this result: - -```output -43 years 9 mons 26 days 19:47:35 -``` - -Nobody ever cites an age like this, with an hours, minutes, and seconds component. But the PostgreSQL designers thought that it was a good idea to implement _age()_ to do this. - -Briefly, and approximately, the function _age()_ extracts the _year_, _month_, _day_, and _seconds_ since midnight for each of the two input moment values. It then subtracts these values pairwise and uses them to create an _interval_ value. In general, this will be a hybrid value with non-zero _mm_, _dd_, and _ss_ components. But the statement of the semantics must be made more carefully than this to accommodate the fact that the outcomes of the pairwise differences might be negative. - -- For example, if today is _"year 2020 month 4"_ and if the date-of-birth is _"year 2010 month 6"_, then a naïve application of this rule would produce an age of _"10 years -2 months"_. But age is never stated like this. Rather, it's stated as _"9 years 10 months"_. This is rather like doing subtraction of distances measured in imperial feet and inches. When you subtract _"10 feet 6 inches"_ from _"20 feet 4 inches"_ you "borrow" one foot, taking _"10 feet"_ down to _"9 feet"_ so that you can subtract _"6 inches"_ from _"12 + 4 inches"_ to get _"10 inches"_. - -However, the borrowing rules get very tricky with dates because "borrowed" months (when pairwise subtraction of _day_ values would produce a negative result) have different numbers of days (and there's leap years to account for too) so the "borrowing" rules get to be quite baroque—so much so that it's impractical to explain the semantics of _age()_ in prose. Rather, you need to model the implementation. PL/pgSQL is perfect for this. - -The full account of _age()_ is presented on its own dedicated child page. - -{{< tip title="Avoid using the built-in 'age()' function." >}} -The rule that _age()_ uses to produce its result cannot be expressed clearly in prose. And, anyway, it produces a result with an entirely inappropriate apparent precision. Yugabyte recommends that you decide how you want to define age for your present use case and then implement the definition that you choose along the lines used in the user-defined functions _age_in_days()_ and _age_in_years()_ shown above in the subsection [The definition of age is a matter of convention](#the-definition-of-age-is-a-matter-of-convention). -{{< /tip >}} - -## function extract() \| function date_part() returns double precision - -The function _extract()_, and the alternative syntax that the function _date_part()_ supports for the same semantics, return a _double precision_ value corresponding to a nominated so-called _field_, like _year_ or _second_, from the input _date-time_ value. - -The full account of _extract()_ and _date_part()_ is presented on its own [dedicated child page](./extract). - -## function timezone() \| 'at time zone' operator returns timestamp \| timestamptz - -The function _timezone()_, and the alternative syntax that operator _at time zone_ supports for the same semantics, return a plain _timestamp_ value from a _timestamptz_ input or a _timestamptz_ value from a plain _timestamp_ input. The effect is the same as if a simple typecast is used from one data type to the other after using _set timezone_ to specify the required timezone. - -```output -timezone(, timestamp[tz]_value) == timestamp[tz]_value at time zone -``` - -Try this example: - -```plpgsql -with c as ( - select '2021-09-22 13:17:53.123456 Europe/Helsinki'::timestamptz as tstz) -select - (timezone('UTC', tstz) = tstz at time zone 'UTC' )::text as "with timezone given as text", - (timezone(make_interval(), tstz) = tstz at time zone make_interval())::text as "with timezone given as interval" -from c; -``` - -This is the result: - -```output - with timezone given as text | with timezone given as interval ------------------------------+--------------------------------- - true | true -``` - -(Because all _make_interval()_'s formal parameters have default values of _zero_, you can invoke it with no actual arguments.) - -Now try this example: - -```plpgsql -set timezone = 'UTC'; -with c as ( - select '2021-09-22 13:17:53.123456 Europe/Helsinki'::timestamptz as tstz) -select - (timezone('UTC', tstz) = tstz::timestamp)::text -from c; -``` - -The result is _true_. - -The function syntax is more expressive than the operator syntax because its overloads distinguish explicitly between specifying the timezone by name or as an _interval_ value. Here is the interesting part of the output from \\_df timezone()_. The rows were re-ordered manually and whitespace was manually added to improve the readability: - -```output - Result data type | Argument data types ------------------------------+--------------------------------------- - timestamp with time zone | text, timestamp without time zone - timestamp without time zone | text, timestamp with time zone - - timestamp with time zone | interval, timestamp without time zone - timestamp without time zone | interval, timestamp with time zone -``` - -The rows for the _timetz_ argument data types were removed manually, respecting the recommendation [here](../../../type_datetime/#avoid-timetz) to avoid using this data type. (You can't get \\_df_ output for the operator _at time zone_.) - -{{< tip title="Avoid using the 'at time zone' operator and use only the function 'timezone()'." >}} -Because the function syntax is more expressive than the operator syntax, Yugabyte recommends using only the former syntax. Moreover, never use _timezone()_ bare but, rather, use it only via the overloads of the user-defined wrapper function _at_timezone()_ and as described in the section [Recommended practice for specifying the UTC offset](../../../type_datetime/timezones/recommendation/). -{{< /tip >}} - -## 'overlaps' operator returns boolean - -The account of the _overlaps_ operator first explains the semantics in prose and pictures. Then it presents two implementations that model the semantics and shows that they produce the same results. - -### 'overlaps' semantics in prose - -The _overlaps_ operator determines if two durations have any moments in common. The _overlaps_ invocation defines a duration either by its bounding moments or by its one bounding moment and the size of the duration (expressed as an _interval_ value). There are therefore _four_ alternative general invocation syntaxes. Either: - -```output -overlaps_result ◄— (left-duration-bound-1, left-duration-bound-2) overlaps (right-duration-bound-1, right-duration-bound-2) -``` - -or: - -```output -overlaps_result ◄— (left-duration-bound-1, left-duration-size) overlaps (right-duration-bound-1, right-duration-bound-2) -``` - -or: - -```output -overlaps_result ◄— (left-duration-bound-1, left-duration-bound-2) overlaps (right-duration-bound-1, right-duration-size) -``` - -or: - -```output -overlaps_result ◄— (left-duration-bound-1, left-duration-size) overlaps (right-duration-bound-1, right-duration-size) -``` - -Unlike other phenomena that have a length, date-time durations are special because time flows inexorably _from_ earlier moments _to_ later moments. It's convenient to say that, when the invocation as presented has been processed, a duration is ultimately defined by its start moment and its finish moment—even if one of these is derived from the other by the size of the duration. In the degenerate case, where the start and finish moments coincide, the duration becomes an instant. - -Notice that, while it's natural to write the start moment before the finish moment, the result is insensitive to the order of the boundary moments or to the sign of the size of the duration. The result is also insensitive to which duration, "left" or "right" is written first. - -This prose account of the semantics starts with some simple examples. Then it states the rules carefully and examines critical edges cases. - -#### Simple examples. - -Here's a simple positive example: - -```plpgsql -select ( - ('07:00:00'::time, '09:00:00'::time) overlaps - ('08:00:00'::time, '10:00:00'::time) - )::text as "time durations overlap"; -``` - -This is the result: - -```output - time durations overlap ------------------------- - true -``` - -And here are some invocation variants that express durations with the same ultimate derived start and finish moments: - -```plpgsql -do $body$ -declare - seven constant time not null := '07:00:00'; - eight constant time not null := '08:00:00'; - nine constant time not null := '09:00:00'; - ten constant time not null := '10:00:00'; - two_hours constant interval not null := make_interval(hours=>2); - - r1 constant boolean not null := (seven, nine) overlaps (eight, ten); - r2 constant boolean not null := (seven, two_hours) overlaps (eight, ten); - r3 constant boolean not null := (seven, nine) overlaps (eight, two_hours); - r4 constant boolean not null := (seven, two_hours) overlaps (eight, two_hours); - - r5 constant boolean not null := (nine, seven) overlaps (ten, eight); - r6 constant boolean not null := (nine, -two_hours) overlaps (ten, -two_hours); -begin - assert ((r1 = r2) and (r1 = r3) and (r1 = r4) and (r1 = r5) and (r1 = r6)), 'Assert failed'; -end; -$body$; -``` - -The block finishes silently, showing that the result from each of the six variants is the same. - -The operator is supported by the _overlaps()_ function. Here is the interesting part of the output from \\_df overlaps()_: - -```output - Result data type | Argument data types -------------------+-------------------------------------------------------------------------------------------------------------------- - boolean | time, time, time, time - boolean | time, interval, time, time - boolean | time, time, time, interval - boolean | time, interval, time, interval - - boolean | timestamp, timestamp, timestamp, timestamp - boolean | timestamp, interval, timestamp, timestamp - boolean | timestamp, timestamp, timestamp, interval - boolean | timestamp, interval, timestamp, interval - - boolean | timestamptz, timestamptz, timestamptz, timestamptz - boolean | timestamptz, interval, timestamptz, timestamptz - boolean | timestamptz, timestamptz, timestamptz, interval - boolean | timestamptz, interval, timestamptz, interval -``` - -The rows for the _timetz_ argument data types were removed manually, respecting the recommendation [here](../../../type_datetime/#avoid-timetz) to avoid using this data type. Also, to improve the readability: - -- the rows were reordered -- _time without time zone_ was rewritten as _time_, -- _timestamp without time zone_ was rewritten as _timestamp_, -- _timestamp with time zone_ was rewritten as _timestamptz_, -- blank rows and spaces were inserted manually - -This boils down to saying that _overlaps_ supports durations whose boundary moments are one of _time_, plain _timestamp_, or _timestamptz_. There is no support for _date_ durations. But you can achieve the functionality that such support would bring simply by typecasting _date_ values to plain _timestamp_ values and using the plain _timestamp_ overload. If you do this, avoid the overloads with an _interval_ argument because of the risk that a badly-chosen _interval_ value will result in a boundary moment with a non-zero time component. Rather, achieve that effect by adding an _integer_ value to a _date_ value _before_ typecasting to plain _timestamp_. - -Here is an example: - -```plpgsql -select ( - ( ('2020-01-01'::date)::timestamp, ('2020-01-01'::date + 2)::timestamp ) overlaps - ( ('2020-01-02'::date)::timestamp, ('2020-01-01'::date + 2)::timestamp ) - )::text as "date durations overlap"; -``` - -This is the result: - -```output - date durations overlap ------------------------- - true -``` - -#### Rule statement and edge cases - -Because (unless the duration collapses to an instant) one of the boundary moments will inevitably be earlier than the other, it's useful to assume that some pre-processing has been done and to write the general invocation syntax using the vocabulary _start-moment_ and _finish-moment_. Moreover (except when both durations start at the identical moment and finish at the identical moment), it's always possible to decide which is the earlier-duration and which is the later-duration. Otherwise (when the two durations exactly coincide), it doesn't matter which is labeled earlier and which is labeled later. - -- If the left-duration's start-moment is less than the right-duration's start-moment, then the left-duration is the _earlier-duration_ and the right-duration is the _later-duration_. - -- If the right-duration's start-moment is less than the left-duration's start-moment, then the right-duration is the _earlier-duration_ and the left-duration is the _later-duration_. - -- Else, if the left-duration's start-moment and the right-duration's start-moment are identical, then - - - If the left-duration's finish-moment is less than the right-duration's finish-moment, then the left-duration is the _earlier-duration_ and the right-duration is the _later-duration_. - - - If the right-duration's finish-moment is less than the left-duration's finish-moment, then the right-duration is the _earlier-duration_ and the left-duration is the _later-duration_. - -It's most useful, in order to express the rules and to discuss the edge cases, to write the general invocation syntax using the vocabulary _earlier-duration_ and _later-duration_ together with _start-moment_ and _finish-moment_, thus: - -```output -overlaps_result ◄— (earlier-duration-start-moment, earlier-duration-finish-moment) overlaps (later-duration-start-moment, later-duration-finish-moment) -``` - -The _overlaps_ operator treats a duration as a closed-open range. In other words: - -```output -duration == [start-moment, finish-moment) -``` - -However, even when a duration collapses to an instant, it is considered to be non-empty. (When the end-points of a `'[)'` _range_ value are identical, this value _is_ considered to be empty and cannot overlap with any other range value.) - -Because the _start-moment_ is included in the duration but the _finish-moment_ is not, this leads to the requirement to state the following edge case rules. (These rules were established by the SQL Standard.) - -- If the left duration is not collapsed to an instant, and the _left-duration-finish-moment_ is identical to the _right-duration-start-moment_, then the two durations _do not_ overlap. This holds both when the right duration is not collapsed to an instant and when it is so collapsed. -- If the left duration is collapsed to an instant, and the _left-duration-start-and-finish-moment_ is identical to the _right-duration-start-moment_, then the two durations _do_ overlap. This holds both when the right duration is not collapsed to an instant and when it is so collapsed. In other words, when two instants coincide, they _do_ overlap. - -Notice that these rules are different from those for the `&&` operator between a pair of `'[)'` _range_ values. (The `&&` operator is also referred to as the _overlaps_ operator for _range_ values.) The differences are seen, in some cases, when instants are involved. Try this: - -```plpgsql -with - c1 as ( - select '2000-01-01 12:00:00'::timestamp as the_instant), - c2 as ( - select - the_instant, - tsrange(the_instant, the_instant, '[)') as instant_range -- notice '[)' - from c1) -select - the_instant, - isempty(instant_range) ::text as "is empty", - ( (the_instant, the_instant) overlaps (the_instant, the_instant) )::text as "overlaps", - ( instant_range && instant_range )::text as "&&" -from c2; -``` - -This is the result: - -```output - the_instant | is empty | overlaps | && ----------------------+----------+----------+------- - 2000-01-01 12:00:00 | true | true | false -``` - -In order to get the outcome _true_ from the `&&` operator, you have to change definition of the ranges from open-closed, `'[)'`, to open-open, `'[]'`, thus: - -```plpgsql -with - c1 as ( - select '2000-01-01 12:00:00'::timestamp as the_instant), - c2 as ( - select - the_instant, - tsrange(the_instant, the_instant, '[]') as instant_range -- notice '[]' - from c1) -select - the_instant, - isempty(instant_range) ::text as "is empty", - ( (the_instant, the_instant) overlaps (the_instant, the_instant) )::text as "overlaps", - ( instant_range && instant_range )::text as "&&" -from c2; -``` - -This is the new result: - -```output - the_instant | is empty | overlaps | && ----------------------+----------+----------+------ - 2000-01-01 12:00:00 | false | true | true -``` - -It doesn't help to ask why the rules are different for the _overlaps_ operator acting between two explicitly specified durations and the `&&` acting between two _range_ values. It simply is what it is—and the rules won't change. - -Notice that you _can_ make the outcomes of the _overlaps_ operator and the `&&` operator agree for all tests. But to get this outcome, you must surround the use of `&&` with some if-then-else logic to choose when to use `'[)'` and when to use `'[]'`. Code that does this is presented on this [dedicated child page](./overlaps/). - -### 'overlaps' semantics in pictures - -The following diagram shows all the interesting cases. - -![overlaps-tests](/images/api/ysql/type_datetime/overlaps-semantics.jpg) - -### Two implementations that model the 'overlaps' semantics and that produce the same results - -These are presented and explained on this [dedicated child page](./overlaps/). The page also presents the tests that show that, for each set of inputs that jointly probe all the interesting cases, the two model implementations produce the same result as each other and the same result as the native _overlaps_ operator, thus: - -```output - TWO FINITE DURATIONS - -------------------- - - 1. Durations do not overlap 2000-01-15 00:00:00, 2000-05-15 00:00:00 | 2000-08-15 00:00:00, 2000-12-15 00:00:00 false - 2. Right start = left end 2000-01-15 00:00:00, 2000-05-15 00:00:00 | 2000-05-15 00:00:00, 2000-12-15 00:00:00 false - 3. Durations overlap 2000-01-15 00:00:00, 2000-08-15 00:00:00 | 2000-05-15 00:00:00, 2000-12-15 00:00:00 true - 3. Durations overlap by 1 microsec 2000-01-15 00:00:00, 2000-06-15 00:00:00.000001 | 2000-06-15 00:00:00, 2000-12-15 00:00:00 true - 3. Durations overlap by 1 microsec 2000-06-15 00:00:00, 2000-12-15 00:00:00 | 2000-01-15 00:00:00, 2000-06-15 00:00:00.000001 true - 4. Contained 2000-01-15 00:00:00, 2000-12-15 00:00:00 | 2000-05-15 00:00:00, 2000-08-15 00:00:00 true - 4. Contained, co-inciding at left 2000-01-15 00:00:00, 2000-06-15 00:00:00 | 2000-01-15 00:00:00, 2000-08-15 00:00:00 true - 4. Contained, co-inciding at right 2000-01-15 00:00:00, 2000-06-15 00:00:00 | 2000-02-15 00:00:00, 2000-06-15 00:00:00 true - 4. Durations coincide 2000-01-15 00:00:00, 2000-06-15 00:00:00 | 2000-01-15 00:00:00, 2000-06-15 00:00:00 true - - ONE INSTANT, ONE FINITE DURATION - -------------------------------- - - 5. Instant before duration 2000-02-15 00:00:00, 2000-02-15 00:00:00 | 2000-03-15 00:00:00, 2000-04-15 00:00:00 false - 6. Instant coincides with duration start 2000-02-15 00:00:00, 2000-02-15 00:00:00 | 2000-02-15 00:00:00, 2000-03-15 00:00:00 true - 7. Instant within duration 2000-02-15 00:00:00, 2000-02-15 00:00:00 | 2000-01-15 00:00:00, 2000-03-15 00:00:00 true - 8. Instant coincides with duration end 2000-02-15 00:00:00, 2000-02-15 00:00:00 | 2000-01-15 00:00:00, 2000-02-15 00:00:00 false - 9. Instant after duration 2000-05-15 00:00:00, 2000-05-15 00:00:00 | 2000-03-15 00:00:00, 2000-04-15 00:00:00 false - - TWO INSTANTS - ------------ - - 10. Instants differ 2000-01-15 00:00:00, 2000-01-15 00:00:00 | 2000-06-15 00:00:00, 2000-06-15 00:00:00 false - 11. Instants coincide 2000-01-15 00:00:00, 2000-01-15 00:00:00 | 2000-01-15 00:00:00, 2000-01-15 00:00:00 true -``` diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/operators/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/operators/_index.md deleted file mode 100644 index 0d11b5878406..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/operators/_index.md +++ /dev/null @@ -1,239 +0,0 @@ ---- -title: Date and time operators [YSQL] -headerTitle: Date and time operators -linkTitle: Operators -description: Describes the date and time operators. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: date-time-operators - parent: api-ysql-datatypes-datetime - weight: 80 -type: indexpage ---- - -Each of the comparison operators, `<`, `<=`, `=`, `>=`, `>`, and `<>`, each of the arithmetic operators, `+`, `-`, `*`, and `/`, and, of course, the `::` typecast operator has one or several overloads whose two operands are among the _date_, _time_, plain _timestamp_, _timestamptz_, and _interval_ data types. The [parent section](../../type_datetime/) explains why _timetz_ is not covered in this overall _date-time_ section. This is why there are _five_ interesting _date-time_ data types. - -The section [Typecasting between values of different date-time data types](../typecasting-between-date-time-values/) shows which of the twenty potential typecasts between pairs of _different_ interesting date-time data types are legal. (It's meaningless to think about typecasting between a pair of values of the _same_ data type.) And for each of those _ten_ that is legal, the section describes its semantics. Further, the typecast operator, `::`, has an overload from/to each of the five listed interesting _date-time_ data types to/from _text_. In other words, there are _ten_ typecasts from/to the interesting _date-time_ data types to/from _text_. The section [Typecasting between date-time values and text values](../typecasting-between-date-time-and-text/) describes these. - -The following tables show, for the comparison operators jointly, for each of the addition and subtraction operators separately, and for the multiplication and division operators jointly, which of the twenty-five nominally definable operand pairs are legal. Links are given to the sections that describe the semantics. - -{{< tip title="Always write the typecast explicitly." >}} -Some of the expressions that use the binary operators that this section covers are legal where you might expect them not to be. The reason is that, as the section [Typecasting between values of different date-time data types](../typecasting-between-date-time-values/) shows, typecasts, and corresponding implicit conversions, are defined between the data types of the operand pairs that you might not expect. Yugabyte recommends that you consider very carefully what your intention is when you take advantage of such conversions between values of different data type. - -There are two reasons for writing the typecast explicitly: - -- You advertise to the reader that typecasting is being done and that they need to understand the typecast semantics. - -- You specify explicitly whether the typecast is to be done on the left operand's value to the right operand's data type, or _vice versa_, rather than having you, and other readers of your code, rely on remembering what the default behavior is. -{{< /tip >}} - -## Overloads of the comparison operators - -All of the comparison operators, `<`, `<=`, `=`, `>=`, `>`, and `<>` are legal for pairs of values of the same _date-time_ data type. The semantics of comparing two moment values is straightforward because a moment value is a scalar. The section [How does YSQL represent an interval value?](../date-time-data-types-semantics/type-interval/interval-representation/) explains that an interval value is actually a three-component _[mm, dd, ss]_ tuple. The semantics of _interval-interval_ comparison, therefore, needs careful definition—and the section [Comparing two interval values](../date-time-data-types-semantics/type-interval/interval-arithmetic/interval-interval-comparison/) does this. Yugabyte recommends that you avoid the complexity that the non-scalar nature of _interval_ values brings by adopting the approach that the section [Custom domain types for specializing the native interval functionality](../date-time-data-types-semantics/type-interval/custom-interval-domains/) describes. (It shows you how two define three _interval_ flavors, pure months, pure days, and pure hours, so that their values are effectively scalars.) - -When the data types differ, the comparison is sometimes simply illegal. The attempt then causes this error: - -```output -42883: operator does not exist... -``` - -Otherwise, even though the data types differ, the comparison is legal. The section [Test the date-time comparison overloads](./test-date-time-comparison-overloads/) presents code that tests all of the comparison overloads whose syntax you can write. This table summarizes the outcomes. An empty cell means that the overload is illegal. - -| | | | | | | -| ---------------------------- | ---------| --------------| --------------------| ----------------| -------------| -| _left operand\right operand_ | **DATE** | **TIME** | **PLAIN TIMESTAMP** | **TIMESTAMPTZ** | **INTERVAL** | -| **DATE** | **ok** | | ok | ok | | -| **TIME** | | **ok** | | | ok | -| **PLAIN TIMESTAMP** | ok | [Note](#note) | **ok** | ok | | -| **TIMESTAMPTZ** | ok | [Note](#note) | ok | **ok** | | -| **INTERVAL** | | ok | | | **ok** | - -If a comparison is legal between values of two different data types, then (with the caveat that the immediately following note states) it's legal _both_ when values of the two data types are used, respectively, as the left and right operands _and_ when they're used as the right and left operands. In all of these cases, the mutual typecast between values of the pair of data types is legal in each direction. - -**Note:** In just _two_ cases, the comparison is _illegal_ between values of a pair of data types where the typecast _is_ legal. The table calls out these cases. In both these cases, the typecast operator is legal _from_ the left operand _to_ the right operand, but _not vice versa_. - -If you think that it makes sense, then you can execute the comparison by writing the explicit typecast. Try this: - -```plpgsql -set timezone = 'UTC'; -with c as ( - select - '13:00:00' ::time as t, - '02-01-2020 12:30:00' ::timestamp as ts, - '02-01-2020 13:30:00 UTC' ::timestamptz as tstz - ) -select - (ts ::time > t)::text as "ts > t", - (tstz::time > t)::text as "tstz > t" -from c; -``` - -It runs without error and produces these two values: - -```output - ts > t | tstz > t ---------+---------- - false | true -``` - -The subsections [plain _timestamp_ to _time_](../typecasting-between-date-time-values#plain-timestamp-to-time) and [timestamptz to time](../typecasting-between-date-time-values#timestamptz-to-time), in the section [Typecasting between values of different date-time data types](../typecasting-between-date-time-values/), explain the semantics of these two typecasts. The outcomes here, _false_ and _true_, are consistent with those explanations. - -In summary, comparison makes obvious sense only when the data types of the left and right operands are the same. These comparisons correspond to the on-diagonal cells. If you have worked out that it makes sense to use any of the comparison operations that the table above shows with _ok_ in an off-diagonal cell, then, for clarity, you should write the explicit typecast that you intend. - -## Overloads of the addition and subtraction operators - -The model for the intended, and therefore useful, functionality is clear and simple: - -- Subtraction between a pair of _date_ values produces an _integer_ value. Correspondingly, adding or subtracting an _integer_ value to/from a _date_ value produces a _date_ value. (Early in PostgreSQL's history, _date_ was the only _date-time_ data type. Because _interval_ was not yet available, it was natural that the difference between two _date_ values would be an _integer_ value.) -- Subtraction between a pair of values of the newer data types _time_, plain _timestamp_, or _timestamptz_, produces an _interval_ value. Correspondingly, adding or subtracting an _interval_ value to/from a _time_, plain _timestamp_, or _timestamptz_ value produces, respectively, a _time_, plain _timestamp_, or _timestamptz_ value. -- Adding two _interval_ values or subtracting one _interval_ value from another produces an _interval_ value. - -The data types that these useful operations produce are shown in **bold** in the cells in the tables in the sections [Overloads of the addition operator](#overloads-of-the-addition-operator) and [Overloads of the subtraction operator](#overloads-of-the-subtraction-operator). The resulting data types in any other non-empty cells in these tables are shown in regular font—and Yugabyte recommends that you avoid using the operations that they denote in application code. (If you are sure that an operation denoted by a regular font cell makes sense in your present use case, then you should write the implied typecast explicitly.) - -- The section [The moment-interval overloads of the "+" and "-" operators for _timestamptz_, _timestamp_, and _time_](../date-time-data-types-semantics/type-interval/interval-arithmetic/moment-interval-overloads-of-plus-and-minus/) explains the semantics here. Because an _interval_ value is a (non-scalar) _[\[mm, dd, ss\]](../date-time-data-types-semantics/type-interval/interval-representation/)_ tuple, the rules are quite complicated. -- The section [Adding or subtracting a pair of _interval_ values](../date-time-data-types-semantics/type-interval/interval-arithmetic/interval-interval-addition/) explains the semantics here. The rules here, too, are subtle—again, because an _interval_ value is an _[\[mm, dd, ss\]](../date-time-data-types-semantics/type-interval/interval-representation/)_ tuple. - -### Overloads of the addition operator - -The section [Test the date-time addition overloads](./test-date-time-addition-overloads/) presents code that tests all of the addition operator overloads whose syntax you can write. This table summarizes the outcomes. An empty cell means that the overload is illegal. - -| | | | | | | -| ---------------------------- | ----------------| ----------------| --------------------| ----------------| --------------------| -| _left operand\right operand_ | **DATE** | **TIME** | **PLAIN TIMESTAMP** | **TIMESTAMPTZ** | **INTERVAL** | -| **DATE** | | plain timestamp | | | plain timestamp | -| **TIME** | plain timestamp | | plain timestamp | timestamptz | **time** | -| **PLAIN TIMESTAMP** | | plain timestamp | | | **plain timestamp** | -| **TIMESTAMPTZ** | | timestamptz | | | **timestamptz** | -| **INTERVAL** | plain timestamp | **time** | **plain timestamp** | **timestamptz** | **interval** | - -Notice that the table is symmetrical about the top-left to bottom-right diagonal. This is to be expected because addition is commutative. - -When the resulting data type is rendered in **bold** font, this indicates that the operation makes intrinsic sense. In three of these cases, the operation has a moment value as one of the arguments and an _interval_ value as the other. (The Yugabyte documentation refers to values of the _date_, _time_, plain _timestamp_ or _timestamptz_ data types as _moments_.) In the fourth case, both arguments are _interval_ values. - -The _date-time_ data types are unusual with respect to addition, at least with respect to the conceptual proposition, in that you can't add two values of the same moment data type. The on-diagonal cell for each of these four data types is empty. (As mentioned, you _can_ add a pair of _interval_ values.) - -The outcomes for the _"date_value + interval_value"_ cell, and the converse _"interval_value + date_value"_ cell, might surprise you. This is explained by the fact that, uniquely for subtraction between moment values, subtracting one _date_ value from another produces an _integer_ value. Try this: - -```plpgsql -select - pg_typeof('2020-01-06'::date - '2020-01-01'::date) as "data type", - '2020-01-06'::date - '2020-01-01'::date as "result"; -``` - -This is the result: - -```output - data type | result ------------+-------- - integer | 5 -``` - -The inverse of this, then, is that adding an _integer_ value to a _date_ value produces a _date_ value. Try this: - -```plpgsql -select - pg_typeof('2020-01-01'::date + 5::integer) as "data type", - '2020-01-01'::date + 5::integer as "result"; -``` - -This is the result: - -```output - data type | result ------------+------------ - date | 2020-01-06 -``` - -As mentioned above, this departure, by _date_, from the pattern that the other moment data types follow reflects PostgreSQL's history. Adding a sixth column and a sixth row for _integer_ to the table would clutter it unnecessarily because only the _date-integer_ and _integer-date_ cells in the new column and the new row would be non-empty. - -The other outcomes are intuitively clear. What could it mean to add three o'clock to five o'clock? However, the clear conceptual proposition is compromised because, as the sections [time to interval](../typecasting-between-date-time-values/#time-to-interval) and [interval to time](../typecasting-between-date-time-values/#interval-to-time) (on the [Typecasting between values of different _date-time_ data types](../typecasting-between-date-time-values/) page) show, _time_ values can be implicitly converted to _interval_ values, and vice-versa. - -This explains the non-empty cells in the _time_ row and the _time_ column that are rendered in normal font. - -Try this: - -```plpgsql -do $body$ -declare - t0 constant time not null := '12:00:00'; - i0 constant interval not null := '12:00:00'; - - t constant time not null := i0; - i constant interval not null := t0; -begin - assert t = t0, 'interval > time failed.'; - assert i = i0, 'time > interval failed.'; -end; -$body$; -``` - -The block finishes silently, showing that both assertions hold. Notice the practice recommendation above. If you work out that you _can_ make use of the semantics of these conversions, you should write the typecasts explicitly rather than rely on implicit conversion. - -**Note**: you might argue that you _can_ give a meaning to the strange notion of adding a pair of _timestamptz_ values like this: - -```plpgsql -drop function if exists sum(timestamptz, timestamptz) cascade; - -create function sum(t1 in timestamptz, t2 in timestamptz) - returns timestamptz - stable - language plpgsql -as $body$ -declare - e1 constant double precision not null := extract(epoch from t1); - e2 constant double precision not null := extract(epoch from t2); - result constant timestamptz not null := to_timestamp(e1 + e2); -begin - return result; -end; -$body$; -``` - -Test it like this: - -```plpgsql -set timezone = 'UTC'; -select sum('1970-01-01 01:00:00 UTC', '1970-01-01 02:00:00 UTC')::text; -``` - -This is the result: - -```output - 1970-01-01 03:00:00+00 -``` - -It's exactly what the semantics of _[extract(epoch from timestamptz_value)](../date-time-data-types-semantics/type-timestamp/)_ and _[to_timestamp(double_precision_value)](../formatting-functions/#from-text-to-date-time)_ tell you to expect. You could even create a user-defined operator `+` based on the function _sum(timestamptz_value, timestamptz_value)_ as defined here. But it's hard to see how the semantics brought by this might be generally useful. - -### Overloads of the subtraction operator - -The section [Test the date-time subtraction overloads](./test-date-time-subtraction-overloads/) presents code that tests all of the subtraction addition operator overloads whose syntax you can write. This table summarizes the outcomes. An empty cell means that the overload is illegal. - -| | | | | | | -| ---------------------------- | ----------------| ----------------| --------------------| ----------------| --------------------| -| _left operand\right operand_ | **DATE** | **TIME** | **PLAIN TIMESTAMP** | **TIMESTAMPTZ** | **INTERVAL** | -| **DATE** | | plain timestamp | interval | interval | plain timestamp | -| **TIME** | | **interval** | | | **time** | -| **PLAIN TIMESTAMP** | interval | plain timestamp | **interval** | interval | **plain timestamp** | -| **TIMESTAMPTZ** | interval | timestamptz | interval | **interval** | **timestamptz** | -| **INTERVAL** | | interval | | | **interval** | - -Notice that the table is symmetrical about the top-left to bottom-right diagonal. This is to be expected because subtraction is commutative in this sense: - -```output -(a - b) = -(b - a) -``` - -When the resulting data type is rendered in **bold** font, this indicates that the operation makes intrinsic sense. You should avoid the operations that the non-empty cells in regular font denote unless you are convinced that a particular one of these makes sense in your present use case. Then, as recommended above, you should write the implied typecast operator explicitly. - -Notice that the number of non-empty cells (_seven_ in all) in **bold** font in this table for the overloads of the subtraction operator is the same as the corresponding number in the table for the overloads of the addition operator. This reflects the complementary relationship between addition and subtraction. - -However, there are more non-empty cells in regular font (_eleven_ in all) in this table for the overloads of the subtraction operator than there are in the table for the overloads of the addition operator (_eight_ in all). You might think that this is odd. This outcome reflects the complex rules for when implicit typecasting may be invoked. - -## Overloads of the multiplication and division operators - -The multiplication and division operators are illegal between all possible pairs of values of the _date-time_ data types. This is consistent with the common-sense expectation: what could it mean, for example, to multiply or divide one _timestamp_ value by another? For completeness, the sections [Test the date-time multiplication overloads](./test-date-time-multiplication-overloads/) and [Test the date-time division overloads](./test-date-time-division-overloads/) present code that confirms that this is the case—in other words, that there are no implicit data type conversions, here, to confuse the simple statement of rule. - -Multiplication and division are legal only when you multiply or divide an _interval_ value by a real or integral number. (Again, what could it mean, for example, to multiply or divide a _timestamp_ value by a number?) Moreover, division needs no specific discussion because dividing the _interval_ value _i_ by the number _n_ is the same as multiplying _i_ by _1/n_. The section [Multiplying or dividing an interval value by a number](../date-time-data-types-semantics/type-interval/interval-arithmetic/interval-number-multiplication/) explains the semantics. This needs careful definition because of the [non-scalar nature of _interval_ values](../date-time-data-types-semantics/type-interval/interval-representation/). - -Yugabyte recommends that you avoid the complexity that the non-scalar nature of _interval_ values brings by adopting the approach that the section [Custom domain types for specializing the native interval functionality](../date-time-data-types-semantics/type-interval/custom-interval-domains/) describes. diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/_index.md deleted file mode 100644 index 972aa97ad2ab..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/_index.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Timezones and UTC offsets [YSQL] -headerTitle: Timezones and UTC offsets -linkTitle: Timezones and UTC offsets -description: Explains everything about timezones and UTC offsets. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: timezones - parent: api-ysql-datatypes-datetime - weight: 40 -type: indexpage ---- - -{{< tip title="Understanding the purpose of specifying the timezone depends on understanding the 'timestamp and timestamptz' section." >}} -To understand when, and why, you should specify the timezone (or, more carefully stated, the offset with respect to the _UTC Time Standard_) at which an operation will be performed, you need to understand the _timestamptz_ data type, and converting its values to/from plain _timestamp_ values. See the [plain _timestamp_ and _timestamptz_ data types](../date-time-data-types-semantics/type-timestamp/) section. -{{< /tip >}} - -{{< note title="The single word spelling, 'timezone' is used throughout the prose of the YSQL documentation." >}} -The two spellings _"timezone"_, as one word, and _"time zone"_, as two words, both occur in SQL syntax. For example both _set timezone = \_ and _set time zone \_ are legal. On the other hand, you can decorate a plain _timestamp_ or a _timestamptz_ value with the _at time zone_ operator—but here spelling it as the single word _"timezone"_ causes an error. In contrast, the name of the run-time parameter, as is used in the invocation of the built-in function, must be spelled as a single word: _current_setting('timezone')_. - -Usually, the spelling of both the single word and the two separate words is case insensitive. Exceptionally, the column in the catalog view _pg_settings.name_ includes the value _'TimeZone'_. Of course, SQL queries against this view must respect this mixed case spelling. - -The YSQL documentation, in the prose accounts, always spells _"timezone"_ as a single word. And where the SQL syntax allows a choice, it is always spelled as a single word there too. -{{< /note >}} - -It's very likely indeed that anybody who has reason to read the YSQL documentation has large experience of attending, and arranging, meetings where the participants join remotely from locations spread over the planet. These days, this experience is commonplace, too, for anybody whose network of friends and relatives spans several countries, several states in North America, or the like—and who takes part in virtual social gatherings. Such meetings invariably use an online calendar that allows anyone who consults it to see the dates and times in their own [local time](../conceptual-background/#wall-clock-time-and-local-time). - -Some of these calendar implementations will use a PostgreSQL or YugabyteDB database. Systems backed by these two databases (or, for that matter, by _any_ SQL database) would doubtless represent events by their starts and ends, using a pair of [_timestamptz_](../date-time-data-types-semantics/type-timestamp/) values—or, maybe by their start and duration, using a _[timestamptz_, _[interval](../date-time-data-types-semantics/type-interval/)]_ value tuple. - -**The _timestamptz_ data type is overwhelmingly to be preferred over plain _timestamp_**. - -This is because, using _timestamptz_, the ability to see an [absolute time](../conceptual-background/#absolute-time-and-the-utc-time-standard) value as a local time in any [timezone](../conceptual-background/#timezones-and-the-offset-from-the-utc-time-standard) of interest is brought simply and declaratively by setting the session's timezone environment variable. - -In other words, the valuable semantic properties of the _timestamptz_ data type are brought by the session _TimeZone_ notion and are inseparable from it. - -In the calendar use case, the setting will, ideally, be to the _real_, and _canonically named_, timezone to which the reader's location belongs. PostgreSQL and YugabyteDB use an implementation of the _[tz database](https://en.wikipedia.org/wiki/Tz_database)_. And the notions _real_ and _canonically named_ come from that. See the section [The _extended_timezone_names_ view](./extended-timezone-names/). - -The _extended_timezone_names_ view joins the _tz database_ data to the _pg_timezone_names_ view. It's critically important to understand how the facts that this catalog view presents relate to those that the _pg_timezone_abbrevs_ catalog view presents. - -**This page has these child pages:** - -- [The _pg_timezone_names_ and _pg_timezone_abbrevs_ catalog views](./catalog-views/) - -- [The _extended_timezone_names_ view](./extended-timezone-names/) - -- [Scenarios that are sensitive to the _UTC offset_ or explicitly to the timezone](./timezone-sensitive-operations/) - -- [Four ways to specify the _UTC offset_](./ways-to-spec-offset/) - -- [Three syntax contexts that use the specification of a _UTC offset_](./syntax-contexts-to-spec-offset/) - -- [Recommended practice for specifying the _UTC offset_](./recommendation/) diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/extended-timezone-names/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/extended-timezone-names/_index.md deleted file mode 100644 index f0d69b72ea97..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/extended-timezone-names/_index.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: The extended_timezone_names view [YSQL] -headerTitle: The extended_timezone_names view -linkTitle: Extended_timezone_names -description: The extended_timezone_names extends the pg_timezone_names view with extra columns from the tz database. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: extended-timezone-names - parent: timezones - weight: 20 -type: indexpage -showRightNav: true ---- - -{{< tip title="Download and install the date-time utilities code." >}} -The code on this page and its child pages doesn't depend on the _date-time utilities_ code. However, the code that the section [Recommended practice for specifying the _UTC offset_](../recommendation/) describes does depend on the _extended_timezone_names_ view. You might also find the views that this page and its child-pages describe to be ordinarily useful by letting you use the power of SQL to get the same information that would be rather tedious to get simply by reading the source data that the [List of tz database time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) presents. - -The code-kit creates a table in a PostgreSQL or YugabyteDB database with the data that the _"List of tz database time zones"_ shows. The simplest way to get the data is just to copy-and-paste the table from the browser display into a plain text file. This naïve approach ends up with a file that has the _\_ character as the field separator—but these separator characters are missing on each line where, in the browser display, the first and maybe next few table cells are empty. There aren't many such rows, and it's easy to fix the missing _\_ characters by hand. This cleaned up file is included in the code kit to save you that effort. (There are other ways to get the same data from the Internet, and you may prefer to use one of these.) - -Once you have the data in a plain text file, it's easy to use the \\_copy_ meta-command at the _psql_ or _ysqlsh_ prompt. (It uses _\_ as the default column separator.) This stages the copied-and-pasted browser data into a table. It turns out that the resulting table content has the character `−` (i.e. _chr(8722)_) in place of the regular `-` character (i.e. _chr(45)_). This affects the columns that record the winter and summer offsets from _UTC_ and the latitude and longitude. The built-in function _replace()_ is used to correct this anomaly. - -The kit also includes the code to create the user-defined function _[jan_and_jul_tz_abbrevs_and_offsets()](../catalog-views/#the-jan-and-jul-tz-abbrevs-and-offsets-table-function)_ that creating the _extended_timezone_names_ view depends upon. It also creates the views that are used to produce the lists that this page's child pages show. -{{< /tip >}} - -## Overview - -This page has these child pages: - -- [extended_timezone_names — unrestricted, full projection](./unrestricted-full-projection/) -- [Real timezones that observe Daylight Savings Time](./canonical-real-country-with-dst/) -- [Real timezones that don't observe Daylight Savings Time](./canonical-real-country-no-dst/) -- [Synthetic timezones (do not observe Daylight Savings Time)](./canonical-no-country-no-dst/) - -The _pg_timezone_names_ view is populated from the _[tz database](https://en.wikipedia.org/wiki/Tz_database)_: - -> The _tz database_ is a collaborative compilation of information about the world's time zones, primarily intended for use with computer programs and operating systems... [It has] the organizational backing of [ICANN](https://en.wikipedia.org/wiki/ICANN). The _tz database_ is also known as _tzdata_, the _zoneinfo database_ or _IANA time zone database_... - -The population of _pg_timezone_names_ is refreshed with successive PostgreSQL Versions. As of the release date of PostgreSQL Version 13.2, the set of names in the _pg_timezone_names_ view in that environment is identical to the set of names in the _tz database_. (The _name_ column in each is unique.) But YugabyteDB Version 2.4, based on PostgreSQL Version 11.2, has three small discrepancies and a few other inconsistencies. [GitHub issue #8550](https://github.com/yugabyte/yugabyte-db/issues/8550) tracks this. - -The _pg_timezone_names_ view shows a projection of the _tz database_'s columns. And the [List of tz database time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) shows a different, but overlapping projection. - -It's useful, therefore, to join these two projections as the _extended_timezone_names_ view. - -## Create the 'extended_timezone_names' view - -The _extended_timezone_names_ view is created as the inner join of _pg_timezone_names_ and the _tz_database_time_zones_extended_ table, created by the code kit. The user-defined table function _[ jan_and_jul_tz_abbrevs_and_offsets()](../catalog-views/#the-jan-and-jul-tz-abbrevs-and-offsets-table-function)_ is used to populate this table from the staging table for the data from the [List of tz database time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) page by adding the columns _std_abbrev_ (the Standard Time timezone abbreviation) and _dst_abbrev_ (the Summer Time timezone abbreviation). - -Various quality checks are made during the whole process. These discover a few more anomalies. These, too, are tracked by [GitHub issue #8550](https://github.com/yugabyte/yugabyte-db/issues/8550). You can see all this in the downloaded code kit. Look for the spool file _YB-QA-reports.txt_. You can also install the kit using PostgreSQL. This will spool a corresponding _PG-QA-reports.txt_ file. - -Here's an example query that selects all of the columns from the _extended_timezone_names_ view for three example timezones.: - -```plpgsql -\x on -select - name, - abbrev, - std_abbrev, - dst_abbrev, - to_char_interval(utc_offset) as utc_offset, - to_char_interval(std_offset) as std_offset, - to_char_interval(dst_offset) as dst_offset, - is_dst::text, - country_code, - lat_long, - region_coverage, - status -from extended_timezone_names -where name in ('America/Los_Angeles', 'Asia/Manila', 'Europe/London') -order by name; -\x off -``` - -This is the result: - -```output -name | America/Los_Angeles -abbrev | PDT -std_abbrev | PST -dst_abbrev | PDT -utc_offset | -07:00 -std_offset | -08:00 -dst_offset | -07:00 -is_dst | true -country_code | US -lat_long | +340308-1181434 -region_coverage | Pacific -status | Canonical -----------------+-------------------- -name | Asia/Manila -abbrev | PST -std_abbrev | PST -dst_abbrev | PST -utc_offset | 08:00 -std_offset | 08:00 -dst_offset | 08:00 -is_dst | false -country_code | PH -lat_long | +1435+12100 -region_coverage | -status | Canonical -----------------+-------------------- -name | Europe/London -abbrev | BST -std_abbrev | GMT -dst_abbrev | BST -utc_offset | 01:00 -std_offset | 00:00 -dst_offset | 01:00 -is_dst | true -country_code | GB -lat_long | +513030-0000731 -region_coverage | -status | Canonical -``` - -Notice that the abbreviation _PST_ has two different meanings, as was emphasized in the section [The columns _pg_timezone_names.abbrev_ and _pg_timezone_abbrevs.abbrev_ record different kinds of facts](../#the-columns-pg-timezone-names-abbrev-and-pg-timezone-abbrevs-abbrev-record-different-kinds-of-facts). - -The installation of the code kit finishes by spooling the _Markdown_ source snippets that define the lists that are presented on this page's child pages. diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/timezone-sensitive-operations/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/timezone-sensitive-operations/_index.md deleted file mode 100644 index 54e3d57b0fb6..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/timezone-sensitive-operations/_index.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: Scenarios that are sensitive to the timezone/UTC offset [YSQL] -headerTitle: Scenarios that are sensitive to the UTC offset or explicitly to the timezone -linkTitle: Offset/timezone-sensitive operations -description: Explains the scenarios that are sensitive to the UTC offset and possibly, additionally, to the timezone. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: timezone-sensitive-operations - parent: timezones - weight: 30 -type: indexpage -showRightNav: true ---- - -All possible operations are inevitably executed in the context of a specified _UTC offset_ because the default scheme for the _TimeZone_ session setting ensures that this is never a zero-length _text_ value or _null_. (See the section [Specify the _UTC offset_ using the session environment parameter _TimeZone_](../syntax-contexts-to-spec-offset/#specify-the-utc-offset-using-the-session-environment-parameter-timezone).) The _TimeZone_ setting might specify the _UTC offset_ directly as an _interval_ value or it might specify it indirectly by identifying the timezone. - -However, only _three_ operations are sensitive to the setting: - -- The conversion of a plain _timestamp_ value to a _timestamptz_ value. -- The conversion of a _timestamptz_ value to a plain _timestamp_ value. -- Adding or subtracting an _interval_ value to/from a _timestamptz_ value. - -## Converting between plain timestamp values and timestamptz values - -The detail is explained in the section [Sensitivity of converting between _timestamptz_ and plain _timestamp_ to the _UTC offset_](./timestamptz-plain-timestamp-conversion/). That section defines the semantics of the conversions. - -- Other conversions where the source or target data type is _timestamptz_ exhibit sensitivity to the _UTC offset_; but this can always be understood as a transitive sensitivity to the fundamental _timestamptz_ to/from plain _timestamp_ conversions. The section [Typecasting between values of different date-time data types](../../typecasting-between-date-time-values/) calls out all of these cases. Here is an example: - - ```output - timestamptz_value::date = (timestamptz_value::timestamp)::date - ``` - -- You can convert between a _timestamptz_ value and a plain _timestamp_ value using either the _::timestamp_ typecast or the _at time zone_ operator. The former approach is sensitive to the current _TimeZone_ session setting. And the latter approach, because the _UTC offset_ is specified explicitly (maybe directly as an _interval_ value or indirectly via an identified timezone) is insensitive to the current _TimeZone_ session setting. -- The built-in function overloads _timezone(timestamp, text)_ and _timezone(interval, text)_ have identical semantics to the _at time zone_ operator and it can be advantageous to prefer these. See the section [Recommended practice for specifying the _UTC offset_](../recommendation/). - -- You can create a _timestamptz_ value explicitly using either the _make_timestamptz()_ built-in or a _text_ literal. In each case, you can identify the timezone, or supply an _interval_ value, directly in the syntax; or you can elide this information and let it be taken from the current _TimeZone_ session setting. The full explanations are given in the section [Specify the _UTC offset_ explicitly within the text of a timestamptz literal or for make_interval()'s 'timezone' parameter](../syntax-contexts-to-spec-offset/#specify-the-utc-offset-explicitly-within-the-text-of-a-timestamptz-literal-or-for-make-interval-s-timezone-parameter) and in the [_text_ to _timestamptz_](../../typecasting-between-date-time-values/#text-to-timestamptz) subsection on the [Typecasting between values of different date-time data types](../../typecasting-between-date-time-values/) page. - -## Adding or subtracting an interval value to/from a timestamptz value - -The section [The sensitivity of _timestamptz-interval_ arithmetic to the current timezone](./timestamptz-interval-day-arithmetic/) defines the semantics. Briefly, the outcome of the operation is sensitive as follows: - -- The sensitivity is specific to adding or subtracting an _interval_ value to or from exactly and only a _timestamptz_ value. - -- The sensitivity is specific to only the _dd_ value of the _[\[mm, dd, ss\]](../../date-time-data-types-semantics/type-interval/interval-representation/)_ internal representation of an _interval_ value. - -- Only the session's _TimeZone_ setting matters. There is no explicit syntax, analogous to the _at time zone_ operator or its equivalent _timezone()_ function overloads to let you override the session's setting. - -- The potential sensitivity requires that the session's _TimeZone_ setting identifies a timezone name, and not an explicit _UTC offset_. - -- The identified timezone must specify a Daylight Savings regime. - -- The range between the starting _timestamptz_ value and the ending _timestamptz_ value that the _interval_ specifies must span either the "spring forward" moment or the "fall back" moment. diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/_index.md deleted file mode 100644 index f8ddad9c200d..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/_index.md +++ /dev/null @@ -1,151 +0,0 @@ ---- -title: Four ways to specify the UTC offset [YSQL] -headerTitle: Four ways to specify the UTC offset -linkTitle: Four ways to specify offset -description: Explains the four ways to specify the UTC offset. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: ways-to-spec-offset - parent: timezones - weight: 40 -type: indexpage -showRightNav: true ---- - -The _UTC offset_ is, ultimately, expressed as an [_interval_](../../date-time-data-types-semantics/type-interval/) value. It can be specified in various different ways as this page explains. - -{{< tip title="Yugabyte recommends using only a timezone name or an explicit 'interval' value to specify the 'UTC offset'." >}} -See the section [Recommended practice for specifying the _UTC offset_](../recommendation/). It presents a sufficient way to achieve all the functionality that you could need while protecting you from the many opportunities to go wrong brought by using the native functionality with no constraints. -{{< /tip >}} - -### Directly as an interval value - -To specify an _interval_ value for the session's _'TimeZone'_ setting, you must use the _set time zone \_ syntax and not the _set timezone = \_ syntax. For example: - -```plpgsql -set time zone interval '-7 hours'; -``` - -Notice that, in this syntax context, the _interval_ value can be specified only using the type name constructor. Each of these attempts: - -```plpgsql -set time zone '-7 hours'::interval; -``` - -and: - -```plpgsql -set time zone make_interval(hours => -7); -``` - -causes this error: - -```output -42601: syntax error -``` - -This reflects an insuperable parsing challenge brought by the very general nature of the _set_ statement. (For example, it has variants for setting the transaction isolation level.) - -In contrast, the _at time zone_ operator allows any arbitrary _interval_ expression. Try this: - -```plpgsql -select '2021-05-27 12:00:00'::timestamp at time zone (make_interval(hours=>5) + make_interval(mins=>45)); -``` - -You can also specify an _interval_ value within the text of a _timestamptz_ literal. But here, you use just the text that would be used with the _::interval_ typecast. Try this: - -```plpgsql -select '2021-05-27 12:00:00 -03:15:00'::timestamptz; -``` -The same rule applies if you use the _make_timestamptz()_ built-in function. Its _timezone_ formal parameter has data type _text_. There is no overload where this parameter has data type _interval_. - -### Directly using POSIX syntax - -The syntax is described in the PostgreSQL documentation in the appendix [B.5. POSIX Time Zone Specifications](https://www.postgresql.org/docs/15/datetime-posix-timezone-specs.html). This allows you to specify the two _UTC offset_ values, one for Standard Time and one for Summer Time, along with the "spring forward" and "fall back" moments. It's exceedingly unlikely that you will need to use this because, these days, everywhere on the planet falls within a canonically-named timezone that keys to the currently-understood rules for Daylight Savings Time from the indefinite past through the indefinite future. The rules are accessible to the PostgreSQL and YugabyteDB servers. (The source is the so-called _[tz database](https://en.wikipedia.org/wiki/Tz_database)_.) If a committee decision changes any rules, then the _tz database_ is updated and the new rules are thence adopted into the configuration data for PostgreSQL and YugabyteDB. Look at the tables on these two pages: [Real timezones that observe Daylight Savings Time](../extended-timezone-names/canonical-real-country-with-dst/); and [Real timezones that don't observe Daylight Savings Time](../extended-timezone-names/canonical-real-country-no-dst/). - -Executing _show timezone_ after it has been set to an explicit _interval_ value reports back using POSIX syntax—albeit a simple form that doesn't specify Daylight Savings Time transitions. Try this: - -```plpgsql -set time zone interval '-07:30:00'; -show timezone; -``` - -This is the result: - -```output - <-07:30>+07:30 -``` - -This is a legal argument for _set timezone_ thus: - -```plpgsql -set timezone = '<-07:30>+07:30'; -show timezone; -``` - -Of course, the result is exactly the same as what you set. It turns out that almost _any_ string that contains one or more digits can be interpreted as POSIX syntax. Try this: - -```plpgsql -set timezone = 'FooBar5'; -show timezone; -``` - -This is the result: - -```output - TimeZone ----------- - FOOBAR5 -``` - -You can easily confirm that _FOOBAR5_ is not found in any of the columns (_pg_timezone_names.name_, _pg_timezone_names.abbrev_, or _pg_timezone_abbrevs.abbrev_) where timezone names or abbreviations are found. - -Now see what effect this has, like this: - -```plpgsql -\set bare_date_time '\'2021-04-15 12:00:00\'' -select :bare_date_time::timestamptz; -``` - -This is the result: - -```output - 2021-04-15 12:00:00-05 -``` - -POSIX takes positive numbers to mean west of the Greenwich Meridian. And yet the PostgreSQL convention for displaying such a _UTC offset_ is to show it as a _negative_ value. (See [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601).) This seems to be the overwhelmingly more common convention. Internet searches seem always to show timezones for places west of Greenwich with negative _UTC offset_ values. Try this: - -```plpgsql -set time zone interval '-5 hours'; -select :bare_date_time::timestamptz; -``` - -Here, the PostgreSQL convention was used to specify the _UTC offset_ value. Using the same _:bare_date_time_ text literal, the result, _2021-04-15 12:00:00-05_, is identical to what it was when the timezone was set to _FooBar5_. - -Next, try this: - -```plpgsql -set timezone = 'Foo5Bar'; -select :bare_date_time::timestamptz; -``` - -Now the result has changed: - -```output -2021-04-15 12:00:00-04 -``` - -What? With the timezone set to _FooBar5_, the result of casting _2021-04-15 12:00:00_ to _timestamptz_ has a _UTC offset_ value of _negative five_ hours. But with the timezone set to _Foo5Bar_, the result of casting the same plain _timestamp_ value to _timestamptz_ has a _UTC offset_ value of _negative four_ hours. You can guess that this has something to do with the way POSIX encodes Daylight Savings Time rules—and with the defaults that are defined (in this case, Summer Time "springs forward" by _one hour_) when the tersest specification of Daylight Savings Time rules is given by using arbitrary text (in the example, at least) after the last digit in the POSIX text. - -### Indirectly using a timezone name - -This is overwhelmingly the preferred approach because it's this, and only this, that brings you the beneficial automatic mapping to the _UTC offset_ value that reigns at the moment of execution of a sensitive operation according to the rules for Daylight Savings Time that the name keys to in the internal representation of the [_tz database_](https://en.wikipedia.org/wiki/Tz_database). (See the section [Scenarios that are sensitive to the _UTC offset_ or explicitly to the timezone](../timezone-sensitive-operations/).) The names are automatically looked up in the _pg_timezone_names_ catalog view. See the section [Rules for resolving a string that's intended to identify a _UTC offset_](./name-res-rules/) - -### Indirectly using a timezone abbreviation - -{{< tip title="Avoid using this approach." >}} -Though this approach is legal, Yugabyte strongly recommends that you avoid using it. -{{< /tip >}} - -The best that this approach can do for you is to bring you a fixed value for the _UTC offset_ that's independent of the moment of lookup. But this is an exceedingly rare use case. If this is what you want, you can do it in a self-documenting way by specifying the _UTC offset_ [Directly as an _interval_ value](#directly-as-an-interval-value) as was explained above. Moreover, there are other risks brought by the attempt to use a timezone abbreviation to specify a _UTC offset_. See the section [Rules for resolving a string that's intended to identify a _UTC offset_](./name-res-rules/). diff --git a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/name-res-rules/_index.md b/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/name-res-rules/_index.md deleted file mode 100644 index caa0ee145f27..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_datetime/timezones/ways-to-spec-offset/name-res-rules/_index.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -title: Rules for resolving a string string intended to identify a UTC offset [YSQL] -headerTitle: Rules for resolving a string that's intended to identify a UTC offset -linkTitle: Name-resolution rules -description: Explains the rules for resolving a string that's intended to identify a UTC offset. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: name-res-rules - parent: ways-to-spec-offset - weight: 10 -type: indexpage -showRightNav: true ---- - -## The rules - - -**Note:** If the text contains a digit, then it is taken as POSIX syntax. See the appendix [B.5. POSIX Time Zone Specifications](https://www.postgresql.org/docs/15/datetime-posix-timezone-specs.html) in the PostgreSQL documentation. - -When a string is used to identify a _UTC offset_, there might seem _a priori_ to be three contexts in which it might be resolved: - -- _either_ as a _pg_timezone_names.name_ value -- _or_ as a _pg_timezone_names.abbrev_ value -- _or_ as a _pg_timezone_abbrevs.abbrev_ value - -Lest "seem" might leave you guessing, _Rule 2_ says that not all contexts are used to resolve all lookups. - -Here are the rules for resolving a string that's intended to identify a _UTC offset_: - -- [Rule 1](./rule-1/) — Lookup of the string is case-insensitive (discounting, of course, using an explicit _select_ statement _from_ one of the _pg_timezone_names_ or _pg_timezone_abbrevs_ catalog views). -- [Rule 2](./rule-2/) — The string is never resolved in _pg_timezone_names.abbrev_. -- [Rule 3](./rule-3/) — The string is never resolved in _pg_timezone_abbrevs.abbrev_ as the argument of _set timezone_ but it is resolved there as the argument of _timezone()_ and within a _text_ literal for a _timestamptz_ value. -- [Rule 4](./rule-4/) — The string is resolved first in _pg_timezone_abbrevs.abbrev_ and, only if this fails, then in _pg_timezone_names.name_. This applies only in those syntax contexts where _pg_timezone_abbrevs.abbrev_ is a candidate for the resolution—so not for _set timezone_, which looks only in _pg_timezone_names.name_. - -The syntax contexts of interest are described in the section [Three syntax contexts that use the specification of a _UTC offset_](../../syntax-contexts-to-spec-offset/). - -**Note:** The code that substantiates _Rule 4_ is able to do this only because there do exist cases where the same string is found in _both_ resolution contexts but with different _utc_offset_ values in the two contexts. - -This table summarises [Rule 2](./rule-3/), [Rule 3](./rule-3/), and [Rule 4](./rule-4/): - -| Syntax context \ View column | pg_timezone_names.name | pg_timezone_names.abbrev | pg_timezone_abbrevs.abbrev | -| ----------------------------------------------- | ---------------------- | --------------------------- |------------------------------------------- | -| _set timezone_[\[1\]](#note-1) | **here only** _[Rule 3](./rule-3/)_ | never _[Rule 2](./rule-2/)_ | not for set timezone | -| _at time zone_[\[2\]](#note-2) | **second** priority _[Rule 4](./rule-4/)_ | never _[Rule 2](./rule-2/)_ | **first** priority | -| _timestamptz_ value [\[3\]](#note-3) | **second** priority _[Rule 4](./rule-4/)_ | never _[Rule 2](./rule-2/)_ | **first** priority | - -**Note 1:** This row applies for the two alternative syntax spellings: - -```plpgsql - set timezone = 'Europe/Amsterdam'; - set time zone 'Europe/Amsterdam'; -``` - -**Note 2:** This row applies for both the _operator syntax_ and the _function syntax_: - -```plpgsql - select ( - (select '2021-06-02 12:00:00'::timestamp at time zone 'Europe/Amsterdam') = - (select timezone('Europe/Amsterdam', '2021-06-02 12:00:00'::timestamp)) - )::text; -``` - -You usually see the _operator syntax_ in blog posts and the like. But there are good reasons to prefer the _function syntax_ in industrial strength application code. The section [Recommended practice for specifying the _UTC offset_](../../recommendation/) explains why and encourages you to use the overloads of the _timezone()_ built-in function only via the user-defined wrapper function [_at_timezone()_](../../recommendation/#the-at-timezone-function-overloads). - -**Note 3:** This row applies for both the _::timestamptz_ typecast of a _text_ literal and the invocation of the _make_timestamptz()_ built-in function: - -```plpgsql - select ( - (select '2021-06-02 12:00:00 Europe/Amsterdam'::timestamptz) = - (select make_timestamptz(2021, 6, 2, 12, 0, 0, 'Europe/Amsterdam')) - )::text; -``` - -## Summary - -The rules for resolving a string that's intended to specify a _UTC offset_ can be summarized thus: - -- The resolution of a string is case-insensitive. -- A string is never resolved in _pg_timezone_names.abbrev_. -- A string is always resolved in _pg_timezone_names.name_. -- A string used in _set timezone_ is resolved only in _pg_timezone_names.name_. -- A string that's used in _at time zone_ or in the explicit specification of a _timestamptz_ value is resolved first in _pg_timezone_abbrevs.abbrev_ and only if this fails, then in _pg_timezone_names.name_. -- If a string escapes all of the attempts at resolution that the previous five bullet points set out, then an attempt is made to resolve it as [POSIX](https://www.postgresql.org/docs/15/datetime-posix-timezone-specs.html) syntax. diff --git a/docs/content/preview/api/ysql/datatypes/type_json/_index.md b/docs/content/preview/api/ysql/datatypes/type_json/_index.md deleted file mode 100644 index 0b6f14c4a240..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_json/_index.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: JSON data types and functionality [YSQL] -headerTitle: JSON data types and functionality -linkTitle: JSON -summary: JSON and JSONB data types -description: Learn about YSQL support for JSON data types (json and jsonb) and their functions and operators. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: api-ysql-datatypes-json - parent: api-ysql-datatypes -aliases: - - /preview/api/ysql/datatypes/type_json -type: indexpage -showRightNav: true ---- - -## Synopsis - -JavaScript Object Notation (JSON) is a text format for the serialization of structured data. Its syntax and semantics are defined in [RFC 7159](https://tools.ietf.org/html/rfc7159). JSON is represented by Unicode characters, and such a representation is usually called a _document_. Whitespace outside of _string_ values and _object_ keys (see below) is insignificant. - -YSQL supports two data types for representing a JSON document: `json` and `jsonb`. Both data types reject any JSON document that does not conform to RFC 7159. The `json` data type stores the text representation of a JSON document as presented. In contrast, the `jsonb` data type stores a parsed representation of the document hierarchy of subvalues in an appropriate internal format. Some people prefer the mnemonic _"binary"_ for the _"b"_ suffix; others prefer _"better"_. Of course, it takes more computation to store a JSON document as a `jsonb` value than as `json` value. This cost is repaid when subvalues are operated on using the operators and functions described in this section. - -JSON was invented as a data interchange format, initially to allow an arbitrary compound value in a JavaScript program to be serialized, transported as text, and then deserialized in another JavaScript program faithfully to reinstantiate the original compound value. Later, many other programming languages (including, now, SQL, and PL/pgSQL) support serialization to, and deserialization from, JSON. Moreover, it has become common to store JSON as the persistent representation of record in a table with just a primary key column and a `json` or `jsonb` column for facts that could be represented classically in a table design that conforms to the relational model. This pattern arose first in NoSQL databases, but it is now widespread in SQL databases. - -## Description - -```ebnf -type_specification ::= { json | jsonb } -``` - -The following topics in this section discuss further details about JSON data types and functionality: - -- [JSON literals](../type_json/json-literals/) -- [Primitive and compound JSON data types](../type_json/primitive-and-compound-data-types/) -- [Code example conventions](../type_json/code-example-conventions/) -- [Create indexes and check constraints on `jsonb` and `json` columns](../type_json/create-indexes-check-constraints/) -- [JSON functions and operators](../type_json/functions-operators/) diff --git a/docs/content/preview/api/ysql/datatypes/type_json/functions-operators/_index.md b/docs/content/preview/api/ysql/datatypes/type_json/functions-operators/_index.md deleted file mode 100644 index 3d84d1676e65..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_json/functions-operators/_index.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: JSON functions and operators -headerTitle: JSON functions and operators -linkTitle: Functions & operators -summary: Functions and operators -description: Learn about JSON functions and operators categorized by the goal you want to accomplish. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: json-functions-operators - parent: api-ysql-datatypes-json - weight: 50 -type: indexpage -showRightNav: true ---- - -**Notes:** For an alphabetical listing of the JSON functions and operators, see the listing in the navigation bar. - -There are two trivial typecast operators for converting between a `text` value that conforms to [RFC 7159](https://tools.ietf.org/html/rfc7159) and a `jsonb` or `json` value, the ordinarily overloaded `=` operator, 12 dedicated JSON operators, and 23 dedicated JSON functions. - -Most of the operators are overloaded so that they can be used on both `json` and `jsonb` values. When such an operator reads a subvalue as a genuine JSON value, then the result has the same data type as the input. When such an operator reads a subvalue as a SQL `text` value that represents the JSON value, then the result is the same for a `json` input as for a `jsonb` input. - -Some of the functions have just a `jsonb` variant and a couple have just a `json` variant. Function names reflect this by starting with `jsonb_` or ending with `_jsonb`—and, correspondingly, for the `json` variants. The reason that this naming convention is used, rather than ordinary overloading, is that YSQL can distinguish between same-named functions when the specification of their formal parameters differ but not when their return types differ. Some of the JSON functions for a specific purpose differ only by returning a `json` value or a `jsonb` value. This is why a single consistent naming convention, a `b` variant and a plain variant, is used throughout. - -When an operator or function has both a JSON value input and a JSON value output, the `jsonb` variant takes a `jsonb` input and produces a `jsonb` output; and, correspondingly, the `json` variant takes a `json` input and produces a `json` output. You can use the `ysqlsh` [`\df`](../../../../ysqlsh-meta-commands/#df-antws-pattern) meta-command to show the signature (that is, the data types of the formal parameters and the return value) of any of the JSON functions; but you cannot do this for the operators. - -Check the full account of each to find its variant status. When an operator or function has both a `jsonb` and `json` variant, then only the `jsonb` variant is described. The functionality of the `json` variant can be trivially understood from the account of the `jsonb` functionality. - -To avoid clutter in the tables, only the `jsonb` variants of the function names are mentioned except where only a `json` variant exists. - -## Convert a SQL value to a JSON value - -| Function or operator | jsonb | json | Description | -| ---- | ---- | ---- | ---- | -| [`::jsonb`](./typecast-operators/#) | yes | yes | `::jsonb` typecasts a SQL `text` value that conforms to RFC 7159 to a `jsonb` value. Use the appropriate one of `::jsonb`, `::json`, or `::text` to typecast between any pair out of `text`, `json`, and `jsonb`, in the direction that you need. | -| [`to_jsonb()`](./to-jsonb/) | yes | yes | Convert a single SQL value of any primitive or compound data type, that allows a JSON representation, to a sematically equivaent `jsonb`, or `json`, value. | -| [`row_to_json()`](./row-to-json/) | | yes | Create a JSON _object_ from a SQL _record_. It has no practical advantage over `to_jsonb()`. | -| [`array_to_json()`](./array-to-json/) | | yes | Create a JSON _array_ from a SQL array value. It has no practical advantage over `to_jsonb()`. | -| [`jsonb_build_array()`](./jsonb-build-array/) | yes | yes | Create a JSON _array_ from a variadic list of values of arbitrary SQL data type. | -| [`jsonb_build_object()`](./jsonb-build-object/) | yes | yes | Create a JSON _object_ from a variadic list that specifies keys with values of arbitrary SQL data type. | -| [`jsonb_object()`](./jsonb-object/) | yes | yes | Create a JSON _object_ from SQL array values that specifiy keys with their values of SQL data type `text`. | -| [`jsonb_agg()`](./jsonb-agg/) | yes | yes | This is an aggregate function. (Aggregate functions compute a single result from a `SETOF` input SQL values.) It creates a JSON _array_ whose values are the JSON representations of the aggregated SQL values. | -| [`jsonb_object_agg()`](./jsonb-object-agg/) | yes | yes | This is an aggregate function. (Aggregate functions compute a single result from a `SETOF` input values.) It creates a JSON _object_ whose values are the JSON representations of the aggregated SQL values. It is most useful when these to-be-aggregated values are _"row"_ type values with two fields. The first represesnts the _key_ and the second represents the _value_ of the intended JSON _object_'s _key-value_ pair. | - -## Convert a JSON value to another JSON value - -| Function or operator | jsonb | json | Description | -| ---- | ---- | ---- | ---- | -| [`->`](./subvalue-operators/) | yes | yes | Read the value specified by a one-step path returning it as a `json` or `jsonb` value. | -| [`#>`](./subvalue-operators/) | yes | yes | Read the value specified by a multi-step path returning it as a `json` or `jsonb` value. | -| [||](./concatenation-operator/) | yes | | Concatenate two `jsonb` values. The rule for deriving the output value depends upon the JSON data types of the operands. | -| [`-`](./remove-operators/) | yes | | Remove key-value pair(s) from an _object_ or a single value from an _array_. | -| [`#-`](./remove-operators) | yes | | Remove a single key-value pair from an _object_ or a single value from an _array_ at the specified path. | -| [`jsonb_extract_path()`](./jsonb-extract-path/) | yes | yes | Provide the identical functionality to the `#>` operator. The path is presented as a variadic list of steps that must all be `text` values. Its invocation more verbose than that of the `#>` operator and there is no reason to prefer the function form to the operator form. | -| [`jsonb_strip_nulls()`](./jsonb-strip-nulls/) | yes | yes | Find all key-value pairs at any depth in the hierarchy of the supplied JSON compound value (such a pair can occur only as an element of an _object_) and return a JSON value where each pair whose value is _null_ has been removed. | -| [`jsonb_set()` and `jsonb_insert()`](./jsonb-set-jsonb-insert/) | yes | | Use `jsonb_set()` to change a JSON value, i.e. the value of an existing key-value pair in a JSON _object_ or the value at an existing index in a JSON array. Use `jsonb_insert()` to insert a value, either as the value for a key that doesn't yet exist in a JSON _object_ or beyond the end or before the start of the index range for a JSON _array_. | - -## Convert a JSON value to a SQL value - -| Function or operator | jsonb | json |Description | -| ---- | ---- | ---- | ---- | -| [`::text`](./typecast-operators/) | yes | yes | Typecast a `jsonb` value to a SQL `text` value that conforms to RFC 7159. Whitesace is conventioanally defined for a `jsonb` operand. Whitespace, in general, is unpredicatable for a `json` operand. | -| [`->>`](./subvalue-operators/) | yes | yes | Like `->` except that the targeted value is returned as a SQL `text` value: _either_ the `::text` typecast of a compound JSON value; _or_ a typecastable `text` value holding the actual value that a primitive JSON value represents. | -| [`#>>`](./subvalue-operators/) | yes | yes | Like `->>` except that the to-be-read JSON subvalue is specified by the path to it from the enclosing JSON value. | -| [`jsonb_extract_path_text()`](./jsonb-extract-path-text/) | yes | yes | Provide the identical functionality to the `#>>` operator. There is no reason to prefer the function form to the operator form. | -| [`jsonb_populate_record()`](./jsonb-populate-record/) | yes | yes | Convert a JSON _object_ into the equivalent SQL `record`. | -| [`jsonb_populate_recordset()`](./jsonb-populate-recordset/) | yes | yes | Convert a homogeneous JSON _array_ of JSON _objects_ into the equivalent set of SQL _records_. | -| [`jsonb_to_record()`](./jsonb-to-record/) | yes | yes | Convert a JSON _object_ into the equivalent SQL `record`. Syntax variant of the functionality that `jsonb_populate_record()` provides. It has some restrictions and brings no practical advantage over its less restricted equivalent. | -| [`jsonb_to_recordset()`](./jsonb-to-recordset/) | yes | yes | Bears the same relationship to `jsonb_to_record()` as `jsonb_populate_recordset()` bears to `jsonb_populate_record()`. Therefore, it brings no practical advantage over its restricted equivalent. | -| [`jsonb_array_elements()`](./jsonb-array-elements/) | yes | yes | Transform the JSON values of JSON _array_ into a SQL table of (i.e. `SETOF`) `jsonb` values. | -| [`jsonb_array_elements_text()`](./jsonb-array-elements-text/) | yes | yes | Transform the JSON values of JSON _array_ into a SQL table of (i.e. `SETOF`) `text` values. | -| [`jsonb_each()`](./jsonb-each/) | yes | yes | Create a row set with columns _"key"_ (as a SQL `text`) and _"value"_ (as a SQL `jsonb`) from a JSON _object_. | -| [`jsonb_each_text()`](./jsonb-each-text/) | yes | yes | Create a row set with columns _"key"_ (as a SQL `text`) and _"value"_ (as a SQL `text`) from a JSON _object_. | -| [`jsonb_pretty()`](./jsonb-pretty/) | yes | | Format the text representation of the JSON value that the input `jsonb` actual argument represents, using whitespace, to make it maximally human readable. | - -## Get a property of a JSON value - -| Function or operator | jsonb | json | Description | -| ---- | ---- | ---- | ---- | -| [`=`](./equality-operator/) | yes | | Test if two `jsonb` values are equal. | -| [`@>` and `<@`](./containment-operators/) | yes | | The `@>` operator tests if the left-hand JSON value contains the right-hand JSON value. The `<@` operator tests if the right-hand JSON value contains the left-hand JSON value. | -| [?, ?|, and ?&](./key-or-value-existence-operators/) | yes | | (1) If the left-hand JSON value is an _object_, test if the right-hand SQL text value(s) exist as key name(s) in the _object_. (2) If the left-hand JSON value is an _array_, test if the right-hand SQL text value(s) exist as JSON _string_ value(s) in the _array_. | -| [`jsonb_array_length()`](./jsonb-array-length/) | yes | | Return the count of values (primitive or compound) in the array. You can use this to iterate over the elements of a JSON _array_ using the `->` operator. | -| [`jsonb_typeof()`](./jsonb-typeof/) | yes | | Return the data type of the JSON value as a SQL `text` value. | -| [`jsonb_object_keys()`](./jsonb-object-keys/) | yes | | Transform the list of key names int the supplied JSON _object_ into a set (i.e. table) of `text` values. | diff --git a/docs/content/preview/api/ysql/datatypes/type_uuid.md b/docs/content/preview/api/ysql/datatypes/type_uuid.md deleted file mode 100644 index 494ca7194ff9..000000000000 --- a/docs/content/preview/api/ysql/datatypes/type_uuid.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: UUID data type [YSQL] -headerTitle: UUID data type -linkTitle: UUID -description: Represents Universally Unique Identifiers (UUIDs). -menu: - preview_api: - identifier: api-ysql-datatypes-uuid - parent: api-ysql-datatypes -aliases: - - /preview/api/ysql/datatypes/type_uuid -type: docs ---- - -## Synopsis - -The `UUID` data type represents Universally Unique Identifiers (UUIDs). A UUID is a sequence of 32 hexadecimal digits separated by hyphens (8 digits - 4 digits - 4 digits - 4 digits - 12 digits) representing the 128 bits. - -## Description - -```ebnf -type_specification ::= UUID -``` - -## Examples - -```output -ffffffff-ffff-ffff-ffff-ffffffffffff -{aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa} -12341234-1234-1234-1234-123412341234 -``` diff --git a/docs/content/preview/api/ysql/exprs/_index.md b/docs/content/preview/api/ysql/exprs/_index.md deleted file mode 100644 index e86ccf79e8bd..000000000000 --- a/docs/content/preview/api/ysql/exprs/_index.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: Built-in functions and operators [YSQL] -headerTitle: Built-in functions and operators -linkTitle: Built-in functions and operators -description: YSQL supports all PostgreSQL-compatible built-in functions and operators. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: api-ysql-exprs - parent: ysql-language-elements - weight: 60 -type: indexpage ---- - -YSQL supports all PostgreSQL-compatible built-in functions and operators. The following are the currently documented ones. - -| Statement | Description | -|-----------|-------------| -| {{}}[yb_index_check()](func_yb_index_check) | Checks if the given index is consistent with its base relation | -| [yb_hash_code()](func_yb_hash_code) | Returns the partition hash code for a given set of expressions | -| [gen_random_uuid()](func_gen_random_uuid) | Returns a random UUID | -| [Sequence functions](sequence_functions/) | Functions operating on sequences | -| [Geo-partitioning helper functions](./geo_partitioning_helper_functions/) | Detailed list of geo-partitioning helper functions | -| [JSON functions and operators](../datatypes/type_json/functions-operators/) | Detailed list of JSON-specific functions and operators | -| [Array functions and operators](../datatypes/type_array/functions-operators/) | Detailed list of array-specific functions and operators | -| [Aggregate functions](./aggregate_functions/) | Detailed list of YSQL aggregate functions | -| [Window functions](./window_functions/) | Detailed list of YSQL window functions | -| [Date-time operators](../datatypes/type_datetime/operators/) | List of operators for the date and time data types | -| [General-purpose date-functions](../datatypes/type_datetime/functions/) | List of general purpose functions for the date and time data types | -| [Date-time formatting functions](../datatypes/type_datetime/formatting-functions/) | List of formatting functions for the date and time data types | diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/_index.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/_index.md deleted file mode 100644 index 1e800a243157..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/_index.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -title: YSQL aggregate functions -linkTitle: Aggregate functions -headerTitle: Aggregate functions -description: This major section describes the syntax and semantics of all of the aggregate functions that YSQL supports. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: aggregate-functions - parent: api-ysql-exprs - weight: 30 -type: indexpage -showRightNav: true ---- -If you are already familiar with aggregate functions, then you can skip straight to the [syntax and semantics](./invocation-syntax-semantics/) section or the section that lists all of [the YSQL aggregate functions](./function-syntax-semantics/) and that links, in turn, to the definitive account of each function. - -This page has only the [Synopsis](./#synopsis) section and the section [Organization of the aggregate functions documentation](./#organization-of-the-aggregate-functions-documentation) section. - -## Synopsis - -Aggregate functions operate on a set of values and return a single value that reflects a property of the set. The functions [`count()`](./function-syntax-semantics/avg-count-max-min-sum/#count) and [`avg()`](./function-syntax-semantics/avg-count-max-min-sum/#avg) are very familiar examples. - -In the limit, the values in the set that the aggregate function operates on are taken from the whole of the result set that the `FROM` list defines, subject to whatever restriction the subquery's `WHERE` clause might define. Very commonly, the set in question is split into subsets according to what the `GROUP BY` clause specifies. - -Very many aggregate functions may be invoked, not only using the ordinary syntax where `GROUP BY` might be used, but also as [window functions](../window_functions/). - -Notice these differences and similarities between aggregate functions and window functions: - -- A window function produces, in general, a different output value for _each different input row_ in the [_window_](../window_functions/invocation-syntax-semantics/#the-window-definition-rule). -- When an aggregate function is invoked using the regular `GROUP BY` clause, it produces a _single value_ for each entire subset that the `GROUP BY` clause defines. -- When an aggregate function is invoked in the same way as a window function, it might, or might not, produce the _same value_ for _each different input row_ in the [_window_](./invocation-syntax-semantics/#the-window-definition-rule). The exact behavior depends on what the [frame clause](../window_functions/invocation-syntax-semantics/#the-frame-clause-1) specifies. -- All of the thirty-seven aggregate functions are listed in the four tables in the section [Signature and purpose of each aggregate function](./function-syntax-semantics/). - -## Organization of the aggregate functions documentation - -The remaining pages are organized as follows: - -### Informal overview of function invocation using the GROUP BY clause - -**[Here](./functionality-overview/)**. Skip this section entirely if you are already familiar with aggregate functions. It presents code examples that classify the aggregate functions into three kinds according to how they may be invoked: - -- [ordinary aggregate functions](./functionality-overview/#ordinary-aggregate-functions) - -- [within-group ordered-set aggregate functions](./functionality-overview/#ordinary-aggregate-functions) - -- [within-group hypothetical-set aggregate functions](./functionality-overview/#within-group-hypothetical-set-aggregate-functions) - -This section focuses on the _effect_ that each illustrated function has. It leaves formal definitions to the [invocation syntax and semantics](./invocation-syntax-semantics/) section and the [Signature and purpose of each aggregate function](./function-syntax-semantics/) section. - -### Aggregate function invocation—SQL syntax and semantics - -**[Here](./invocation-syntax-semantics/)**. This section presents the formal treatment of the syntax and semantics of how an aggregate function is invoked as a special kind of `SELECT` list item—with the invocation syntax optionally decorated with an `ORDER BY` clause, or a `FILTER` clause. This account also explains the use of the `HAVING` clause which lets you restrict a result set according the value(s) returned by a list of aggregate functions. - -There are four variants of the `GROUP BY` invocation style: `GROUP BY `; `GROUP BY GROUPING SETS`; `GROUP BY ROLLUP`; and `GROUP BY CUBE`. Further, all but the bare `GROUP BY ` allow the use of a `GROUPING` keyword in the `SELECT` list to label the different `GROUPING SETS`. Because all of this requires a fairly lengthy explanation, this is covered in the dedicated section [`Using the GROUPING SETS, ROLLUP, and CUBE syntax for aggregate function invocation`](./grouping-sets-rollup-cube/). - -### Signature and purpose of each aggregate function - -**[Here](./function-syntax-semantics/)**. The following list groups the thirty-seven aggregate functions in the same way that the sidebar items group them. The rationale for the grouping is explained in the referenced sections. - -      [`avg()`](./function-syntax-semantics/avg-count-max-min-sum/#avg)
-      [`max()`](./function-syntax-semantics/avg-count-max-min-sum/#max-min)
-      [`min()`](./function-syntax-semantics/avg-count-max-min-sum/#max-min)
-      [`sum()`](./function-syntax-semantics/avg-count-max-min-sum/#sum) - -      [`array_agg()`](./function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#array-agg)
-      [`string_agg()`](./function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#string-agg)
-      [`jsonb_agg()`](./function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#jsonb-agg)
-      [`jsonb_object_agg()`](./function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#jsonb-object-agg) - -      [`bit_and()`](./function-syntax-semantics/bit-and-or-bool-and-or/#bit-and)
-      [`bit_or()`](./function-syntax-semantics/bit-and-or-bool-and-or/#bit-or)
-      [`bool_and()`](./function-syntax-semantics/bit-and-or-bool-and-or/#bool-and)
-      [`bool_or()`](./function-syntax-semantics/bit-and-or-bool-and-or/#bool-or) - -      [`variance()`](./function-syntax-semantics/variance-stddev/#variance)
-      [`var_pop()`](./function-syntax-semantics/variance-stddev/#var-pop)
-      [`var_samp()`](./function-syntax-semantics/variance-stddev/#var-samp)
-      [`stddev()`](./function-syntax-semantics/variance-stddev/#stddev)
-      [`stddev_pop()`](./function-syntax-semantics/variance-stddev/#stddev-pop)
-      [`stddev_samp()`](./function-syntax-semantics/variance-stddev/#stddev-samp) - -      [`covar_pop()`](./function-syntax-semantics/linear-regression/covar-corr/#covar-pop-covar-samp)
-      [`covar_samp()`](./function-syntax-semantics/linear-regression/covar-corr/#covar-pop-covar-samp)
-      [`corr()`](./function-syntax-semantics/linear-regression/covar-corr/#corr) - -      [`regr_avgy()`](./function-syntax-semantics/linear-regression/regr/#regr-avgy-regr-avgx)
-      [`regr_avgx()`](./function-syntax-semantics/linear-regression/regr/#regr-avgy-regr-avgx)
-      [`regr_count()`](./function-syntax-semantics/linear-regression/regr/#regr-count)
-      [`regr_slope()`](./function-syntax-semantics/linear-regression/regr/#regr-slope-regr-intercept)
-      [`regr_intercept()`](./function-syntax-semantics/linear-regression/regr/#regr-slope-regr-intercept)
-      [`regr_r2()`](./function-syntax-semantics/linear-regression/regr/#regr-r2)
-      [`regr_syy()`](./function-syntax-semantics/linear-regression/regr/#regr-syy-regr-sxx-regr-sxy)
-      [`regr_sxx()`](./function-syntax-semantics/linear-regression/regr/#regr-syy-regr-sxx-regr-sxy)
-      [`regr_sxy()`](./function-syntax-semantics/linear-regression/regr/#regr-syy-regr-sxx-regr-sxy) - -      [`mode()`](./function-syntax-semantics/mode-percentile-disc-percentile-cont/#mode)
-      [`percentile_disc()`](./function-syntax-semantics/mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont)
-      [`percentile_cont()`](./function-syntax-semantics/mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont) - -      [`rank()`](./function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#rank)
-      [`dense_rank()`](./function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#dense-rank)
-      [`percent_rank()`](./function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#percent-rank)
-      [`cume_dist()`](./function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#cume-dist) - -### Aggregate functions case study—the "68–95–99.7" rule - -**[Here](case-study-the-68-95-997-rule/)**. Regard this section as an optional extra. It shows the use of aggregate functions to demonstrate the so-called "68–95–99.7 rule"—described in [this Wikipedia article](https://en.wikipedia.org/wiki/68%e2%80%9395%e2%80%9399.7_rule). This case-study focuses on just one part of the rule: - -> 68.27% of the values in a normal distribution lie within one standard deviation each side of the mean. diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/_index.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/_index.md deleted file mode 100644 index 024ca186bb07..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/_index.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: > - Case study: linear regression analysis of COVID data -linkTitle: > - Case study: linear regression on COVID data -headerTitle: > - Case study: linear regression analysis of COVID data from Carnegie Mellon's COVIDcast project -description: Case study—using the YSQL regr_r2(), regr_slope(), regr_intercept() to examine the correlation between COVID-like symptoms and mask-wearing using data from Carnegie Mellon's COVIDcast. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: covid-data-case-study - parent: aggregate-functions - weight: 110 -type: indexpage -showRightNav: true ---- -## Overview of the data and the code - -[Carnegie Mellon’s COVIDcast](https://covidcast.cmu.edu/) is an academic project that tracks real-time coronavirus statistics. The team uses various data collection methods and exposes data for download in various formats. This case study uses data that were collected using daily Facebook surveys with the aim of examining the possible correlation between wearing a face-mask and showing symptoms like those of SARS-CoV-2—hereinafter COVID. Specifically, three so-called signals are recorded. - -- Does the respondent wear a face mask? - -- Does the respondent have COVID-like symptoms? -- Does the respondent know someone in their community who has COVID-like symptoms? - -Each signal is expressed as a percentage relative to the number of people who answered the particular question. - -The download format, for each signal, is a comma-separated values file—hereinafter `.csv` file. The download page says this: - -> We are happy for you to use this [sic] data in products and publications. Please acknowledge us as a source: Data from Delphi COVIDcast, [covidcast.cmu.edu](https://covidcast.cmu.edu/). - -This case study shows you how to use the `ysqlsh` `\COPY` meta-command to load each downloaded file into its own table, how to check that the values conform to rules that the COVIDcast team has documented, and how to join the rows in these staging tables into a single table with this format: - -``` -survey_date date not null } primary -state text not null } key -mask_wearing_pct numeric not null -mask_wearing_stderr numeric not null -mask_wearing_sample_size integer not null -symptoms_pct numeric not null -symptoms_stderr numeric not null -symptoms_sample_size integer not null -cmnty_symptoms_pct numeric not null -cmnty_symptoms_stderr numeric not null -cmnty_symptoms_sample_size integer not null -``` - -It then shows you how to use the linear-regression functions [`regr_r2()`](../function-syntax-semantics/linear-regression/regr/#regr-r2), [`regr_slope()`](../function-syntax-semantics/linear-regression/regr/#regr-slope-regr-intercept), and [`regr_intercept()`](../function-syntax-semantics/linear-regression/regr/#regr-slope-regr-intercept) to examine the correlation between mask-wearing and COVID-like symptoms. The section [Functions for linear regression analysis](../function-syntax-semantics/linear-regression/) explains the general background for these functions. - -The remaining account of this case-study is divided into three parts: - -- [How to find and download the COVIDcast data](./download-the-covidcast-data/) -- [How to ingest the data](./ingest-the-covidcast-data/), check that the values conform to the rules that the COVIDcast team has documented, and to transform these into the single _"covidcast_fb_survey_results"_ table. -- [How to use YSQL aggregate functions to examine the possible correlation](./analyze-the-covidcast-data/) between wearing a face-mask and showing COVID-like symptoms. - -{{< tip title="Download a zip of all the files that this case study uses" >}} - -All of the `.sql` scripts that this case-study presents for copy-and-paste at the `ysqlsh` prompt are included for download in a zip-file. The zip also includes the three `csv` files that you will download from the [Carnegie Mellon COVIDcast](https://delphi.cmu.edu/covidcast/) site. This will allow you, after you've studied the account of the case study and run the files one by one, then to run everything by starting a single master script that will ingest the data and spool the reports the this study explains to files. It will allow you easily to re-run the analysis on newer data as these become available. - -It is expected that the raw data will be available from the COVIDcast site into the indefinite future. But the downloadable self-contained zip-fie of the complete case study assures readers of the longevity of this study's pedagogy. - -[Download `covid-data-case-study.zip`](https://raw.githubusercontent.com/yugabyte/yugabyte-db/master/sample/covid-data-case-study/covid-data-case-study.zip). - -After unzipping it on a convenient new directory, you'll see a `README.txt`. It tells you to run `0.sql`. You'll see this in the top directory. It looks like this: - -```plpgsql -\i ingest-the-data.sql -\i analysis-queries.sql -\i synthetic-data.sql -``` - -Simply start it in `ysqlsh`. You can run it time and again. It always finishes silently. You can see the reports that it produces on the _"analysis-results"_ directory and confirm that the files that are spooled are identical to the corresponding reference copies that are delivered in the zip-file. -{{< /tip >}} - -## Conclusion - -The function [`regr_r2()`](../function-syntax-semantics/linear-regression/regr/#regr-r2) implements a measure that the literature refers to as "R-squared". When the "R-squared" value is _0.6_, it means that _60%_ of the relationship of the putative _dependent_ variable (incidence of COVID-like symptoms) to the putative _independent_ variable (mask-wearing) is explained by a simple _"y = m*x + c"_ linear dependence—and that the remaining _40%_ is unexplained. A value greater than about _60%_ is generally taken to indicate that the putative _dependent_ variable really does depend upon the putative _independent_ variable. - -The downloaded COVIDcast data spanned a fifty day period (from 13-Sep-2020 through 1-Nov-2020). The value of "R-squared" was computed, in turn, for each of these days. It was greater than or equal to _60%_ on about _80%_ of these days. - -This outcome means that empirical evidence supports the claim that wearing a mask does indeed inhibit the spread of COVID. diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/_index.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/_index.md deleted file mode 100644 index 24db586ed9cd..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/_index.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: Analyze the COVIDcast data -linkTitle: Analyze the COVIDcast data -headerTitle: Using the YSQL linear regression analysis functions on the COVIDcast data—introduction -description: Using regr_r2(), regr_slope(), regr_intercept() on the COVIDcast data—introduction -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: analyze-the-covidcast-data - parent: covid-data-case-study - weight: 30 -type: indexpage -showRightNav: true ---- - -## Introduction - -Try this query: - -```plpgsql -select max(symptoms_pct) from covidcast_fb_survey_results; -``` - -The result is about _2.7%_. This indicates that the signal "_symptoms_pct"_ (characterized on the [COVIDcast download page](../ingest-the-covidcast-data/inspect-the-csv-files/) by "Percentage of people with COVID-like symptoms, based on surveys of Facebook users") has little power of discrimination. - -Now try this: - -```plpgsql -select - (select min(cmnty_symptoms_pct) as "Min" from covidcast_fb_survey_results), - (select max(cmnty_symptoms_pct) as "Max" from covidcast_fb_survey_results); -``` - -The results here are about _7%_ for the minimum and about _55%_ for the maximum. This indicates that the signal "_cmnty_symptoms_pct"_ (characterized on the [COVIDcast download page](../ingest-the-covidcast-data/inspect-the-csv-files/) by "Percentage of people who know someone in their local community with COVID-like symptoms, based on surveys of Facebook users") will have a reasonable power of discrimination. - -None of the YSQL built-in aggregate functions can take account of the _"stderr"_ or _"sample_size"_ values that were carried forward into the final _"covidcast_fb_survey_results"_ table. But you might like to try some _ad hoc_ queries to get an idea of the variability and reliability of the data. - -For example, this: - -```plpgsql -select avg(cmnty_symptoms_stderr) from covidcast_fb_survey_results; -``` - -gives a result of about _0.8_ for the percentage values in the range _7%_ through _55%_. This suggests that the seven day moving averages are reasonably reliable. - -And this: - -```plpgsql -select - (select min(cmnty_symptoms_sample_size) as "Min" from covidcast_fb_survey_results), - (select max(cmnty_symptoms_sample_size) as "Max" from covidcast_fb_survey_results); -``` - -results in about _325_ for the minimum and about _24.6_ thousand for the maximum. This is a rather troublesomely wide range. The result of this query: - -```plpgsql -select - round(avg(cmnty_symptoms_sample_size)) as "Avg", - state -from covidcast_fb_survey_results -group by state -order by 1; -``` - -suggests that the sample size is probably correlated with the state's population. For example, the two biggest sample size values are from California and Texas. and the two smallest are from DC and Wyoming. It would be straightforward to find a list of recent values for state populations from the Internet and to join these, using state, into a table together with the average sample sizes from the query above. You could then use the [`regr_r2()`](../../function-syntax-semantics/linear-regression/regr/#regr-r2) function to see how well-correlated the size of a state's response to the COVIDcast Facebook survey is to its population. This is left as an exercise for the reader. - -Create a view to focus your attention on the values that the analysis presented in the remainder of this section uses: - -```plpgsql -create or replace view covidcast_fb_survey_results_v as -select - survey_date, - state, - mask_wearing_pct, - cmnty_symptoms_pct as symptoms_pct -from covidcast_fb_survey_results; -``` - -This is included in the [`analysis-queries.sql`](./analysis-scripts/analysis-queries-sql/) script that also implements all of the queries that the analysis presented in the remainder of this section uses. - -If you want to see how the results come out when you use the _"symptoms_pct"_ column instead of the _"cmnty_symptoms_pct"_ column, just redefine the view, thus: - -```plpgsql -create or replace view covidcast_fb_survey_results_v as -select - survey_date, - state, - mask_wearing_pct, - symptoms_pct as symptoms_pct -from covidcast_fb_survey_results; -``` - -## How the rest of this analysis section is organized - -- The section [Daily values for regr_r2(), regr_slope(), regr_intercept() for symptoms vs mask-wearing](./daily-regression-analysis/) describes the actual linear regression analysis code. - -- The section [Select the data for COVID-like symptoms vs mask-wearing by state scatter plot](./symptoms-vs-mask-wearing-by-state/) shows the SQL that lists out the _51_ individual _"(symptoms_pct, mask_wearing_pct)"_ tuples for the day that was arbitrarily chosen for drawing a scatter-plot on top of which the outcome of the regression analysis for that day is drawn. diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/analysis-scripts/_index.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/analysis-scripts/_index.md deleted file mode 100644 index e9d628e02f60..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/analyze-the-covidcast-data/analysis-scripts/_index.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: SQL scripts for analyzing the COVIDcast data -linkTitle: SQL scripts -headerTitle: SQL scripts for performing linear regression analysis on the COVIDcast data -description: SQL scripts for performing linear regression analysis on COVIDcast data -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: analysis-scripts - parent: analyze-the-covidcast-data - weight: 100 -type: indexpage ---- - -Here are the `.sql` scripts that jointly implement analysis: - -- [`analysis-queries.sql`](./analysis-queries-sql) executes queries on the actual COVIDcast data. - -- [`synthetic-data.sql`](./synthetic-data-sql) generates the data that are used to create the plot descibed in the section [Scatter-plot for synthetic data](../scatter-plot-for-2020-10-21/#scatter-plot-for-synthetic-data). diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/_index.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/_index.md deleted file mode 100644 index 72bf39ba8159..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/_index.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Ingest the COVIDcast data into YugabyteDB -linkTitle: Ingest the COVIDcast data -headerTitle: Ingesting, checking, and combining the COVIDcast data -description: Ingest the COVIDcast data, check it for format compliance, and to combine it all into the single "covidcast_fb_survey_results" table -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: ingest-the-covidcast-data - parent: covid-data-case-study - weight: 20 -type: indexpage ---- - -## Ingest the .csv files, check the assumptions, and combine the interesting values into a single table - -Here are the steps: - -- [Manually inspect the .csv files](./inspect-the-csv-files). - -- [Copy the data from each `.csv` file "as is" into a dedicated staging table, with effective primary key _("state, survey_date)"_](./stage-the-csv-files). (The qualifier "effective" recognizes the fact that, as yet, these columns will have different names that reflect how they're named in the `.csv` files.) - -- [Check that the values from the `.csv` files do indeed conform to the stated rules](./check-data-conforms-to-the-rules). - -- [Project the columns of interest from the staging tables and join these into a single table](./join-the-staged-data/), with primary key _("state, survey_date)"_ for analysis. - -All of these steps are implemented by the [`ingest-the-data.sql`](./ingest-scripts/ingest-the-data-sql/) script. It's designed so that you can run, and re-run, it time and again. It will always finish silently (provided that you say `set client_min_messages = warning;`) Each time you run it. It calls various other scripts. You will download these, along with [`ingest-the-data.sql`](./ingest-scripts/ingest-the-data-sql/), as you step through the sections in the order that the left-hand navigation menu presents. diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/ingest-scripts/_index.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/ingest-scripts/_index.md deleted file mode 100644 index f00fc0ccf9a9..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/covid-data-case-study/ingest-the-covidcast-data/ingest-scripts/_index.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: SQL scripts for ingesting the COVIDcast data -linkTitle: SQL scripts -headerTitle: SQL scripts for ingesting the COVIDcast data -description: SQL scripts for ingesting COVIDcast data -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: ingest-scripts - parent: ingest-the-covidcast-data - weight: 100 -type: indexpage ---- -Here are the `.sql` scripts that jointly implement the whole ingestion flow: creating staging tables; copying in the data from the `.csv` files; checking that the data in the staging tables conforms to the expected rules; and projecting the relevant columns and joining them into a single _"covidcast_fb_survey_results"_ table. - -- [`ingest-the-data.sql`](./ingest-the-data-sql) is the master script for this purpose. It contains some explicit SQL statements and it invokes other files that, for example, create procedures that the master script calls. Save it to the _"covid-data-case-study"_ directory that you created for this case study. You can't run it yet because the files that it needs aren't yet in place. You will step through each manually and then save it. When you've done this once, and saved all the files, you will then be able to run, and re-run, the whole ingestion process with a single "button press". This will be useful if you decide, later, to download more recent COVIDcast data. - - Here are the scripts that the master script relies on: - -- [`cr-cr-staging-tables.sql`](./cr-cr-staging-tables-sql) - -- [`cr-cr-copy-from-csv-scripts.sql`](./cr-cr-copy-from-csv-scripts-sql) - -- [`cr-assert-assumptions-ok.sql`](./cr-assert-assumptions-ok-sql) - -- [`cr-xform-to-covidcast-fb-survey-results.sql`](./cr-xform-to-joined-table-sql) diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/function-syntax-semantics/_index.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/function-syntax-semantics/_index.md deleted file mode 100644 index 44838e43dd07..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/function-syntax-semantics/_index.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -title: YSQL aggregate functions signature and purpose -linkTitle: Per function signature and purpose -headerTitle: Signature and purpose of each aggregate function -description: This section summarizes the signature and purpose of each of the YSQL aggregate functions and links to their individual accounts. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: aggregate-function-syntax-semantics - parent: aggregate-functions - weight: 90 -aliases: - - /preview/api/ysql/exprs/aggregate_functions -type: indexpage -showRightNav: true ---- - -The aggregate functions are categorized into four classes: - -- [general-purpose aggregate functions](#general-purpose-aggregate-functions) - -- [statistical aggregate functions](#statistical-aggregate-functions) - -- [_within-group ordered-set_ aggregate functions](#within-group-ordered-set-aggregate-functions) - -- [_within-group hypothetical-set_ aggregate functions](#within-group-hypothetical-set-aggregate-functions) - -## General-purpose aggregate functions - -The aggregate functions in this class can be invoked in one of two ways: - -- _Either_ "ordinarily" on all the rows in a table or in connection with `GROUP BY`, when they return a single value for the set of rows. - - In this use, row ordering often doesn't matter. For example, [`avg()`](./avg-count-max-min-sum/#avg) has this property. Sometimes, row ordering _does_ matter. For example, the order of grouped values determines the mapping between array index and array value with [`array_agg()`](./array-string-jsonb-jsonb-object-agg/#array-agg). - -- _Or_ as a [window function](../../window_functions/) with `OVER`. - - In this use, where the aggregate function is evaluated for each row in the [window](../../window_functions/invocation-syntax-semantics/#the-window-definition-rule), ordering always matters. - -Arguably, `avg()` might be better classified as a statistical aggregate function. (It is often used together with `stddev()`.) But, because it is so very commonly used, and used as a canonical example of the _"aggregate function"_ notion, it is classified here as a general-purpose aggregate function. - -| Function | Description | -| ---- | ---- | -| [`array_agg()`](./array-string-jsonb-jsonb-object-agg/#array-agg) | Returns an array whose elements are the individual values that are aggregated. This is described in full detail the [`array_agg()`](../../../datatypes/type_array/functions-operators/array-agg-unnest/#array-agg) subsection in the main [Array](../../../datatypes/type_array/) section. | -| [`avg()`](./avg-count-max-min-sum/#avg) | Computes the arithmetic mean of a set of summable values by adding them all together and dividing by the number of values. If the set contains nulls, then these are simply ignored—both when computing the sum and when counting the number of values. | -| [`bit_and()`](./bit-and-or-bool-and-or/#bit-and) | Returns a value that represents the outcome of the applying the two-by-two matrix `AND` rule to each alligned set of bits for the set of `NOT NULL` input values. | -| [`bit_or()`](./bit-and-or-bool-and-or#bit-or) | Returns a value that represents the outcome of the applying the two-by-two matrix `OR` rule to each alligned set of bits for the set of `NOT NULL` input values. | -| [`bool_and()`](./bit-and-or-bool-and-or/#bool-and) | Returns a value that represents the outcome of the applying the two-by-two matrix `AND` rule to the set of `NOT NULL` input boolean values. | -| [`bool_or()`](./bit-and-or-bool-and-or/#bool-or) | Returns a value that represents the outcome of the applying the two-by-two matrix `OR` rule to the set of `NOT NULL` input boolean values. | -| [`count()`](./avg-count-max-min-sum/#count) | Counts the number of non null values in a set. The data type of the values is of no consequence. | -| `every()` | `every()` is a synonym for [`bool_and()`](./bit-and-or-bool-and-or/#bool-and) | -| [`jsonb_agg()`](./array-string-jsonb-jsonb-object-agg/#jsonb-agg) | This, and `json_agg()` are described in detail the [`jsonb_agg()`](../../../datatypes/type_json/functions-operators/jsonb-agg/) section in the main [JSON](../../../datatypes/type_json/) section. | -| [`jsonb_object_agg()`](./array-string-jsonb-jsonb-object-agg/#jsonb-object-agg) | This and `json_object_agg()` are described in detail the [`jsonb_object_agg()`](../../../datatypes/type_json/functions-operators/jsonb-object-agg/) section in the main [JSON](../../../datatypes/type_json/) section. | -| [`max()`](./avg-count-max-min-sum/#max-min) | Computes the greatest value among the values in the set using the rule that is used for the particular data type in the ORDER BY clause. nulls are removed before sorting the values. | -| [`min()`](./avg-count-max-min-sum/#max-min) | Computes the least value among the values in the set using the rule that is used for the particular data type in the ORDER BY clause. nulls are removed before sorting the values. | -| [`string_agg()`](./array-string-jsonb-jsonb-object-agg/#string-agg) | Returns a single value produced by concatenating the aggregated values (first argument) separated by a mandatory separator (second argument). The first overload has `text` inputs and returns `text`. The second overload has `bytea` inputs and returns `bytea`. | -| [`sum()`](./avg-count-max-min-sum/#sum) | Computes the sum of a set of summable values by adding them all together. If the set contains nulls, then these are simply ignored. | -| [`xmlagg()`](https://www.postgresql.org/docs/15/functions-aggregate.html) | This is not supported through Version YB 2.2. See [GitHub Issue #1043](https://github.com/yugabyte/yugabyte-db/issues/1043) | - -## Statistical aggregate functions - -The aggregate functions in this class can be invoked in one of two ways: - -- _Either_ "ordinarily" on all the rows in a table or in connection with `GROUP BY`, when they return a single value for the set of rows. In this use, row ordering doesn't matter. - -- _Or_ as a [window function](../../window_functions/) with `OVER`. - - In this use, where the aggregate function is evaluated for each row in the [window](../../window_functions/invocation-syntax-semantics/#the-window-definition-rule), ordering always matters. - -| Function | Description | -| ---- | ---- | -| [`covar_pop()`](./linear-regression/covar-corr/#covar-pop-covar-samp) | Returns the so-called covariance, taking the available values to be the entire population. | -| [`covar_samp()`](./linear-regression/covar-corr/#covar-pop-covar-samp) | Returns the so-called covariance, taking the available values to be a sample of the population. | -| [`corr()`](./linear-regression/covar-corr/#corr) | Returns the so-called correlation coefficient. This measures the extent to which the _"y"_ values are linearly related to the _"x"_ values. A return value of 1.0 indicates perfect correlation. | -| [`regr_avgy()`](./linear-regression/regr/#regr-avgy-regr-avgx) | Returns the average of the first argument for those rows where both arguments are `NOT NULL`. | -| [`regr_avgx()`](./linear-regression/regr/#regr-avgy) | Returns the average of the second argument for those rows where both arguments are `NOT NULL`. | -| [`regr_count()`](./linear-regression/regr/#regr-count) | Returns the number of rows where both arguments are `NOT NULL`. | -| [`regr_slope()`](./linear-regression/regr/#regr-slope-regr-intercept) | Returns the slope of the straight line that linear regression analysis has determined best fits the "(y, x)" pairs. | -| [`regr_intercept()`](./linear-regression/regr/#regr-slope-regr-intercept) | Returns the point at which the straight line that linear regression analysis has determined best fits the "(y, x)" pairs intercepts the "y"-axis. | -| [`regr_r2()`](./linear-regression/regr/#regr-r2) | Returns the square of the correlation coefficient, [`corr()`](./linear-regression/covar-corr/#corr). | -| [`regr_syy()`](./linear-regression/regr/#regr-syy-regr-sxx-regr-sxy) | Returns `regr_count(y, x)*var_pop(y)` for `NOT NULL` pairs. | -| [`regr_sxx()`](./linear-regression/regr/#regr-syy-regr-sxx-regr-sxy) | Returns `regr_count(y, x)*var_pop(x)` for `NOT NULL pairs`. | -| [`regr_sxy()`](./linear-regression/regr/#regr-syy-regr-sxx-regr-sxy) | Returns `regr_count(y, x)*covar_pop(y, x)` for `NOT NULL` pairs. | -| [`variance()`](./variance-stddev/#variance) | The semantics of `variance()` and [`var_samp()`](./variance-stddev/#var-samp) are identical. | -| [`var_pop()`](./variance-stddev/#var-pop) | Returns the variance of a set of values using the "population" variant of the formula that divides by _N_, the number of values. | -| [`var_samp()`](./variance-stddev/#var-samp) | Returns the variance of a set of values using the "sample" variant of the formula that divides by _(N - 1)_ where _N_ is the number of values. | -| [`stddev()`](./variance-stddev/#stddev) | The semantics of `stddev()` and [`stddev_samp()`](./variance-stddev/#stddev-samp) are identical. | -| [`stddev_pop()`](./variance-stddev/#stddev-pop) | Returns the standard deviation of a set of values using the naïve formula (i.e. the "population" variant) that divides by the number of values, _N_. | -| [`stddev_samp()`](./variance-stddev/#stddev-samp) | Returns the standard deviation of a set of values using the "sample" variant of the formula that divides by _(N - 1)_ where _N_ is the number of values. | - -## Within-group ordered-set aggregate functions - -These functions are sometimes referred to as “inverse distribution” functions. They can be invoked only with the dedicated `WITHIN GROUP (ORDER BY ...)` syntax. They cannot be invoked as a [window function](../../window_functions/) with `OVER`. - -| Function | Description | -| ---- | ---- | -| [`mode()`](./mode-percentile-disc-percentile-cont/#mode) | Return the most frequent value of "sort expression". If there's more than one equally-frequent value, then one of these is silently chosen arbitrarily. | -| [`percentile_disc()`](./mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont) | Discrete variant. The scalar overload of `percentile_disc()` takes a percentile rank value as input and returns the value, within the specified window, that would produce this. The array overload takes an array of percentile rank values as input and returns the array of values that would produce these. | -| [`percentile_cont()`](./mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont) | Continuous variant. The scalar overload of `percentile_cont()` takes a percentile rank value as input and returns the value, within the specified window, that would produce this. The array overload takes an array of percentile rank values as input and returns the array of values that would produce these. | - -## Within-group hypothetical-set aggregate functions - -These functions as invoked, as within-group hypothetical-set aggregate functions, with the dedicated `WITHIN GROUP (ORDER BY ...)` syntax. Further, each of the functions listed in this class is associated with a [window function](../../window_functions/) of the same name. (But not every window function can be invoked in this way.) For each, the result is the value that the associated window function would have returned for the “hypothetical” row constructed, as the invocation syntax specifies, as if such a row had been added to the [window](../../window_functions/invocation-syntax-semantics/#the-window-definition-rule). - -| Function | Description | -| ---- | ---- | -| [`rank()`](./rank-dense-rank-percent-rank-cume-dist/#rank) | Returns the integer ordinal rank of each row according to the emergent order that the window `ORDER BY` clause specifies. The series of values starts with 1 but, when the window contains ties, the series is not dense. See the account of [rank()](../../window_functions/function-syntax-semantics/row-number-rank-dense-rank/#rank) in the [Window functions](../../window_functions/) section for more information. | -| [`dense_rank()`](./rank-dense-rank-percent-rank-cume-dist/#dense-rank) | Returns the integer ordinal rank of the distinct value of each row according to what the window `ORDER` BY clause specifies. The series of values starts with 1 and, even when the window contains ties, the series is dense. See the account of [dense_rank()](../../window_functions/function-syntax-semantics/row-number-rank-dense-rank/#dense-rank) in the [Window functions](../../window_functions/) section for more information.| -| [`percent_rank()`](./rank-dense-rank-percent-rank-cume-dist/#percent-rank) | Returns the percentile rank of each row within the window, with respect to the argument of the window_definition's window `ORDER BY` clause. See the account of [percent_rank()](../../window_functions/function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank) in the [Window functions](../../window_functions/) section for more information. | -| [`cume_dist()`](./rank-dense-rank-percent-rank-cume-dist/#cume-dist) | Returns a value that represents the number of rows with values less than or equal to the current row’s value divided by the total number of rows—in other words, the relative position of a value in a set of values. See the account of [cume_dist()](../../window_functions/function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist) in the [Window functions](../../window_functions/) section for more information. | diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/function-syntax-semantics/linear-regression/_index.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/function-syntax-semantics/linear-regression/_index.md deleted file mode 100644 index e0e6b7eef1dc..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/function-syntax-semantics/linear-regression/_index.md +++ /dev/null @@ -1,176 +0,0 @@ ---- -title: covar_pop(), covar_samp(), corr(), regr_%() -linkTitle: linear regression -headerTitle: Functions for linear regression analysis -description: Describes the functionality of the covar_pop(), covar_samp(), corr(), and regr_%() family of YSQL aggregate functions for linear regression analysis -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: linear-regression - parent: aggregate-function-syntax-semantics - weight: 50 -type: indexpage -showRightNav: true ---- - -This parent section and its two child sections describe these aggregate functions for linear regression analysis: - -- [`covar_pop()`](./covar-corr/#covar-pop-covar-samp), [`covar_samp()`](./covar-corr/#covar-pop-covar-samp), [`corr()`](./covar-corr/#corr) -- [`regr_avgy()`](./regr/#regr-avgy-regr-avgx), [`regr_avgx()`](./regr/#regr-avgy-regr-avgx), [`regr_count()`](./regr/#regr-count), [`regr_slope()`](./regr/#regr-slope-regr-intercept), [`regr_intercept()`](./regr/#regr-slope-regr-intercept), [`regr_r2()`](./regr/#regr-r2), [`regr_syy()`](./regr/#regr-syy-regr-sxx-regr-sxy)[`regr_sxx()`](./regr/#regr-syy-regr-sxx-regr-sxy), [`regr_sxy()`](./regr/#regr-syy-regr-sxx-regr-sxy). - -## Overview - -See, for example, this Wikipedia article on [Regression analysis](https://en.wikipedia.org/wiki/Regression_analysis). Briefly, linear regression analysis estimates the relationship between a dependent variable and an independent variable, aiming to find the line that most closely fits the data. This is why each of the functions described has two input formal parameters. The _dependent variable_, the first formal parameter, is conventionally designated by _"y"_; and the _independent variable_, the second formal parameter, is conventionally designated by _"x"_. - -See, for example, the article ["How To Interpret R-squared in Regression Analysis"](https://statisticsbyjim.com/regression/interpret-r-squared-regression/). It says this: - -> Linear regression identifies the equation that produces the smallest difference between all of the observed values and their [fitted values](https://statisticsbyjim.com/glossary/fitted-values/). To be precise, linear regression finds the smallest sum of squared [residuals](https://statisticsbyjim.com/glossary/residuals/) that is possible for the dataset. - -In terms of the high school equation for a straight line: - -``` -y = m*x + c -``` - -the function [`regr_slope(y, x)`](./regr/#regr-slope-regr-intercept) estimates the gradient, _"m"_, of the straight line that best fits the set of coordinate pairs over which the aggregation is done; and the function [`regr_intercept(y, x)`](./regr/#regr-slope-regr-intercept) estimates its intercept with the y-axis, _"c"_. The so-called "R-squared " measure, implemented by [`regr_r2(y, x)`](./regr/#regr-r2), indicates the goodness-of-fit. It measures the percentage of the variance in the dependent variable that the independent variables explain collectively—in other words, the strength of the relationship between your model and the dependent variable on a 0 – 100% scale. For example, if `regr_r2()` returns a value of _0.7_, it means that seventy percent of the relationship between the putative dependent variable and the independent variable can be explained by a straight line with the gradient and intercept returned, respectively, by `regr_slope()` and `regr_intercept()`. The remaining thirty percent can be attributed to stochastic variation. - -The purpose of each of the functions is rather specialized; but the domain is also very familiar to people who need to do linear regression. For this reason, the aim here is simply to explain enough for specialists to be able to understand exactly what is available, and how to invoke what they decide that they need. Each function is illustrated with a simple example. - -Each of these aggregate functions is invoked by using the same syntax: - -- _either_ the simple syntax, `select aggregate_fn(expr, expr) from t` -- _or_ the `GROUP BY` syntax -- _or_ the `OVER` syntax - -Only the simple invocation is illustrated. See, for example, the sections [`GROUP BY` syntax](../avg-count-max-min-sum/#group-by-syntax) and [`OVER` syntax](../avg-count-max-min-sum/#over-syntax) in the section [`avg(), count(), max(), min(), sum()`](../avg-count-max-min-sum/) for how to use these syntax patterns. - -**Signature:** - -Each one of the aggregate functions for linear regression analysis, except for `regr_count()`, has the same signature: - -``` -input value: double precision, double precision - -return value: double precision -``` - -Because it returns a count, `regr_count()` returns a `bigint`, thus: - -``` -input value: double precision, double precision - -return value: bigint -``` -In all cases, the first input parameter represents the values that you want to be taken as the _dependent variable_ (conventionally denoted by _"y"_) and the second input parameter represents the values that you want to be taken as the _independent variable_ (conventionally denoted by _"x"_). - -{{< note title="About nullness" >}} -If, for a particular input row, _either_ the expression for _"y"_, _or_ the expression for _"x"_, evaluates to `null`, then that row is implicitly filtered out. -{{< /note >}} - -## Create the test table - -The same test table recipe serves for illustrating all of the functions for linear regression analysis. The design is straightforward. Noise is added to a pure linear function, thus: - -```output - y = slope*x + intercept + delta -``` - -where _"delta"_ is picked, for each _"x"_ value from a pseudorandom normal distribution with specified mean and standard deviation. - -The procedure _"populate_t()"_ lets you try different values for _"slope"_, _"intercept"_, and for the size and variability of _"delta"_. It uses the function `normal_rand()`, brought by the [tablefunc](../../../../../../explore/ysql-language-features/pg-extensions/extension-tablefunc) extension. - -```plpgsql -drop procedure if exists populate_t( - int, double precision, double precision, double precision, double precision) - cascade; -drop table if exists t cascade; - -create table t( - k int primary key, - x double precision, - y double precision, - delta double precision); - -create procedure populate_t( - no_of_rows in int, - slope in double precision, - intercept in double precision, - mean in double precision, - stddev in double precision) - language plpgsql -as $body$ -begin - delete from t; - - with - a1 as ( - select - s.v as k, - s.v as x, - (s.v * slope) + intercept as y - from generate_series(1, no_of_rows) as s(v)), - - a2 as ( - select ( - row_number() over()) as k, - r.v as delta - from normal_rand(no_of_rows, mean, stddev) as r(v)) - - insert into t(k, x, y, delta) - select - k, x, a1.y, a2.delta - from a1 inner join a2 using(k); - - insert into t(k, x, y, delta) values - (no_of_rows + 1, 0, null, null), - (no_of_rows + 2, null, 0, null); -end; -$body$; - -\set no_of_rows 100 -call populate_t( - no_of_rows => :no_of_rows, - - mean => 0.0, - stddev => 20.0, - slope => 5.0, - intercept => 3.0); - -\pset null -with a as( - select k, x, y, delta from t where x between 1 and 5 - union all - select k, x, y, delta from t where k between 96 and (:no_of_rows + 2)) -select - to_char(x, '990.9') as x, - to_char(y, '990.9') as y, - to_char((y + delta), '990.9999') as "y + delta" -from a -order by k; -``` - -Here is an impression of the result of invoking _"populate_t()"_ with the values shown. The whitespace has been manually added. - -``` - x | y | y + delta ---------+--------+----------- - 1.0 | 8.0 | -5.9595 - 2.0 | 13.0 | -14.8400 - 3.0 | 18.0 | 40.4009 - 4.0 | 23.0 | 27.8537 - 5.0 | 28.0 | 68.7411 - - 96.0 | 483.0 | 483.9196 - 97.0 | 488.0 | 464.3205 - 98.0 | 493.0 | 528.2446 - 99.0 | 498.0 | 514.0421 - 100.0 | 503.0 | 549.7692 - 0.0 | | - | 0.0 | -``` - -The individual functions are described in these two child-sections - -- [`covar_pop()`, `covar_samp()`, `corr()`](./covar-corr/) - -- [`regr_%()`](./regr/) diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/functionality-overview.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/functionality-overview.md deleted file mode 100644 index 40faf85d8123..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/functionality-overview.md +++ /dev/null @@ -1,464 +0,0 @@ ---- -title: Aggregate function invocation -linkTitle: Informal functionality overview -headerTitle: Informal overview of aggregate function invocation -description: This section provides an informal introduction to the invocation of aggregate functions. -menu: - preview_api: - identifier: aggregate-functions-functionality-overview - parent: aggregate-functions - weight: 10 -type: docs ---- - -Aggregate functions fall into two kinds according to the syntax that you use to invoke them. - -## Ordinary aggregate functions - -All of the functions listed in the two tables [General-purpose aggregate functions](../function-syntax-semantics/#general-purpose-aggregate-functions) and [Statistical aggregate functions -](../function-syntax-semantics/#statistical-aggregate-functions) are of this kind. Aggregate functions of this kind can be invoked in one of two ways: - -- _Either_ "ordinarily" on all the rows in a table or in connection with `GROUP BY`, when they return a single value for the set of rows. - - In this use, row ordering often doesn't matter. For example, [`avg()`](../function-syntax-semantics/avg-count-max-min-sum/#avg) has this property. Sometimes, row ordering _does_ matter. For example, the order of grouped values determines the mapping between array index and array value with [`array_agg()`](../function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#array-agg). - -- _Or_ as a [window function](../../window_functions/) with `OVER`. - - In this use, where the aggregate function is evaluated for each row in the [window](../../window_functions/invocation-syntax-semantics/#the-window-definition-rule), ordering always matters. - -### Ordinary invocation - -First create and populate a test table: - -```plpgsql -drop table if exists t cascade; -create table t( - k int primary key, - class int not null, - n numeric not null, - s text not null); - -insert into t(k, class, n, s) -select - v, - ntile(2) over (order by v), - (7 + v*0.1), - chr(ascii('a') + v - 1) -from generate_series(1, 10) as g(v); - -select class, to_char(n, '0.99') as n, s from t order by class, n; -``` - -This is the result: - -``` - class | n | s --------+-------+--- - 1 | 7.10 | a - 1 | 7.20 | b - 1 | 7.30 | c - 1 | 7.40 | d - 1 | 7.50 | e - 2 | 7.60 | f - 2 | 7.70 | g - 2 | 7.80 | h - 2 | 7.90 | i -``` - -Now demonstrate the ordinary invocation of the aggregate functions [`count()`](../function-syntax-semantics/avg-count-max-min-sum/#count) and [`avg()`](../function-syntax-semantics/avg-count-max-min-sum/#avg): - -```plpgsql -select - count(n) as count, - to_char(avg(n), '0.99') as avg -from t; -``` -This is the result: - -``` - count | avg --------+------- - 10 | 7.55 -``` -Next, add a `GROUP BY` clause: - -```plpgsql -select - class, - count(n) as count, - to_char(avg(n), '0.99') as avg -from t -group by class -order by class; -``` - -This is the result: - -``` - class | count | avg --------+-------+------- - 1 | 5 | 7.30 - 2 | 5 | 7.80 -``` - -Next demonstrate the use of the `FILTER` syntax as part of the `SELECT` list invocation syntax of `avg()` and the `ORDER BY` syntax as part of the `SELECT` list invocation syntax of [`string_agg()`](../function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#string-agg): - -```plpgsql -select - count(n) as count, - to_char(avg(n) filter (where k%3 = 0), '0.99') as avg, - string_agg(s, '-' order by k desc) as s -from t; -``` - -This is the result: - -``` - count | avg | s --------+-------+--------------------- - 10 | 7.60 | j-i-h-g-f-e-d-c-b-a -``` - -### Invoking an ordinary aggregate function as a window function - -Every ordinary aggregate function can be invoked, also, as a window function. - -See also the section [Informal overview of window function invocation using the OVER clause](../../window_functions/functionality-overview/). This section also has examples of invoking an ordinary aggregate function as a window function. - -Try this: - -```plpgsql -with a as ( - select - class, - count(n) over w1 as count, - avg(n) over w2 as avg, - string_agg(s, '-') over w1 as s - from t - window - w1 as (partition by class order by k), - w2 as (order by k groups between 2 preceding and 2 following)) -select class, count, to_char(avg, '0.99') as avg, s -from a; -``` - -This is the result: - -``` - class | count | avg | s --------+-------+-------+----------- - 1 | 1 | 7.20 | a - 1 | 2 | 7.25 | a-b - 1 | 3 | 7.30 | a-b-c - 1 | 4 | 7.40 | a-b-c-d - 1 | 5 | 7.50 | a-b-c-d-e - 2 | 1 | 7.60 | f - 2 | 2 | 7.70 | f-g - 2 | 3 | 7.80 | f-g-h - 2 | 4 | 7.85 | f-g-h-i - 2 | 5 | 7.90 | f-g-h-i-j -``` - -Notice that the effect of the omission of the [frame clause](../../window_functions/invocation-syntax-semantics/#the-frame-clause-1) in the definition of _"w1"_ for the invocation of `count()` and `string_agg()` is to ask to use the rows from the start of the window through the current row. - -Notice, too, that the effect of `groups between 2 preceding and 2 following` in the definition of _"w2"_ for the invocation of `avg()` is to compute the moving average within a window of two values below and two values above the present value. - -The rules for the [window_definition rule](../../window_functions/invocation-syntax-semantics/#the-window-definition-rule)—and in particular the effect of omitting the so-called [frame clause](../../window_functions/invocation-syntax-semantics/#the-frame-clause-1)—are explained in the section [Window function invocation—SQL syntax and semantics](../../window_functions/invocation-syntax-semantics/). - -## Within-group aggregate functions - -This kind has two sub-kinds: - -- [within-group ordered-set aggregate functions](#within-group-ordered-set-aggregate-functions) - -- [within-group hypothetical-set aggregate functions](#within-group-hypothetical-set-aggregate-functions) - -The invocation syntax is the same for the functions in both subgroups. But the semantic proposition is different. - -### Within-group ordered-set aggregate functions - -There are only three aggregate functions of this sub-kind: [`mode()`](../function-syntax-semantics/mode-percentile-disc-percentile-cont/#mode), [`percentile_disc()`](../function-syntax-semantics/mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont), and [`percentile_cont()`](../function-syntax-semantics/mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont). - -The `mode()` function is chosen to illustrate the "within-group ordered-set" syntax here because its meaning is the easiest of the three to understand. It simply returns the most frequent value of the ordering expression used in this syntax: - -``` -within group (order by ) -``` - -If there's more than one equally-frequent value, then one of these is silently chosen arbitrarily. - -First create and populate a test table. It's convenient to use the same table and population that the `mode()` section uses in the subsection [Example that uses GROUP BY](../function-syntax-semantics/mode-percentile-disc-percentile-cont/#example-that-uses-group-by). The code is copied here for your convenience. The data is contrived so that the value _"v = 37"_ occurs twice for _"class = 1"_ and so that the value _"v = 42"_ occurs twice for _"class = 2"_. Otherwise each distinct value of _"v"_ occurs just once. - -```plpgsql -drop table if exists t cascade; -create table t( - k int primary key, - class int not null, - v int not null); - -insert into t(k, class, v) -select - s.v, - 1, - case s.v between 5 and 6 - when true then 37 - else s.v - end -from generate_series(1, 10) as s(v) -union all -select - s.v, - 2, - case s.v between 15 and 17 - when true then 42 - else s.v - end -from generate_series(11, 20) as s(v); -``` - -Now list out the biggest three counts for each distinct value of _"v"_ for each of the two values of _"class": - -```plpgsql -select 1 as class, v, count(*) "frequency" -from t -where class = 1 -group by v -order by count(*) desc, v -limit 3; - -select 2 as class, v, count(*) "frequency" -from t -where class = 2 -group by v -order by count(*) desc, v -limit 3; -``` - -These are the results: - -``` - class | v | frequency --------+----+----------- - 1 | 37 | 2 - 1 | 1 | 1 - 1 | 2 | 1 - - class | v | frequency --------+----+----------- - 2 | 42 | 3 - 2 | 11 | 1 - 2 | 12 | 1 -``` - -Here's how to invoke the `mode()` within-group ordered-set aggregate function: - -```plpgsql -select - class, - mode() within group (order by v) as "most frequent v" -from t -group by class -order by class; -``` - -Here is the result: - -``` - class | most frequent v --------+----------------- - 1 | 37 - 2 | 42 -``` - -### Within-group hypothetical-set aggregate functions - -There are four functions of this sub-kind: [`rank()`](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#rank), [`dense_rank()`](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#dense-rank), [`percent_rank()`](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#percent-rank), and [`cume_dist()`](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#cume-dist). See the section [Within-group hypothetical-set aggregate functions](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/) for more information. - -The same functions can also be invoked as window functions. That use is described here: - -- [`rank()`](../../window_functions/function-syntax-semantics/row-number-rank-dense-rank/#rank) -- [`dense_rank()`](../../window_functions/function-syntax-semantics/row-number-rank-dense-rank/#dense-rank) -- [`percent_rank()`](../../window_functions/function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank) -- [`cume_dist()`](../../window_functions/function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist) - -The basic semantic definition of each function is the same in each invocation scenario. But the goals of the two invocation methods are critically different. The window function invocation method produces the value prescribed by the function's definition for each extant row. And the within-group hypothetical-set invocation method produces the value that the row whose relevant values are specified in the invocation _would_ produce if such a row were actually (rather than hypothetically) to be inserted. - -First create and populate a test table. It's convenient to use the same table and population that's used in the subsection [Semantics demonstration](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#semantics-demonstration) in the "Within-group hypothetical-set aggregate functions" section. The code is copied here for your convenience. - -```plpgsql -drop table if exists t cascade; -create table t( - k int primary key, - class int not null, - score int); - -insert into t(k, class, score) -with a as ( - select s.v from generate_series(1, 10) as s(v)) -values(0, 1, null::int) -union all -select - v, - ntile(2) over (order by v), - case v <= 5 - when true then v*2 - else (v - 5)*2 - end -from a; - -\pset null -select class, score -from t -order by class, score nulls first; -``` - -This is the result: - -``` - class | score --------+-------- - 1 | - 1 | 2 - 1 | 4 - 1 | 6 - 1 | 8 - 1 | 10 - 2 | 2 - 2 | 4 - 2 | 6 - 2 | 8 - 2 | 10 -``` - -Next, create a view defined by a `SELECT` statement that invokes the `rank()` function as a window function: - -```plpgsql -create or replace view v as -select - k, - class, - score, - (rank() over (partition by class order by score nulls first)) as r -from t; -``` - -Visualize the results that the view defines: - -```plpgsql -select class, score, r -from v -order by class, r; -``` - -This is the result: - -``` - class | score | r --------+--------+--- - 1 | | 1 - 1 | 2 | 2 - 1 | 4 | 3 - 1 | 6 | 4 - 1 | 8 | 5 - 1 | 10 | 6 - 2 | 2 | 1 - 2 | 4 | 2 - 2 | 6 | 3 - 2 | 8 | 4 - 2 | 10 | 5 -``` - -Now, simulate the hypothetical insert of two rows, one in each class, and visualize the values that `rank()` produces for these. Do this within a transaction that you rollback. - -```plpgsql -start transaction; -insert into t(k, class, score) values (21, 1, 5), (22, 2, 6); - -select class, score, r -from v -where k in (21, 22) -order by class, r; - -rollback; -``` - -This is the result: - -``` - class | score | r --------+-------+--- - 1 | 5 | 4 - 2 | 6 | 3 -``` - -Now, mimic the two hypothetical inserts. Notice that the text of the `SELECT` statement is identical for the case where _"score"_ is set to _5_ and _"class"_ is set to _1_ and the case where _"score"_ is set to _6_ and _"class"_ is set to _2_. - -```plpgsql -\set score 5 -\set class 1 -select - :class as class, - :score as score, - rank(:score) within group (order by score nulls first) as r -from t -where class = :class; - -\set score 6 -\set class 2 -select - :class as class, - :score as score, - rank(:score) within group (order by score nulls first) as r -from t -where class = :class; -``` - -These are the results: - -``` - class | score | r --------+-------+--- - 1 | 5 | 4 - - class | score | r --------+-------+--- - 2 | 6 | 3 -``` - -Notice that they are the same as were seen inside the _"start transaction;... rollback;"_ code above. - -Now try the two within-group hypothetical-set invocations without the restriction `where class = :class` but instead with `GROUP BY class`: - -```plpgsql -\set score 5 -select - class, - :score as score, - rank(:score) within group (order by score nulls first) as r -from t -group by class; - -\set score 6 -select - class, - :score as score, - rank(:score) within group (order by score nulls first) as r -from t -group by class; -``` - -This is the result: - -``` - class | score | r --------+-------+--- - 1 | 5 | 4 - 2 | 5 | 3 - - class | score | r --------+-------+--- - 1 | 6 | 4 - 2 | 6 | 3 -``` - -Notice that values were produced, for each value in turn of the hypothetical _"score"_, for _every_ currently existing value of _"class"_. This corresponds to what would bee seen, in the simulated insert within the rolled back transaction, if each chosen value of _"score"_ were inserted once for each currently existing value of the `GROUP BY` column _"class"_. diff --git a/docs/content/preview/api/ysql/exprs/aggregate_functions/invocation-syntax-semantics.md b/docs/content/preview/api/ysql/exprs/aggregate_functions/invocation-syntax-semantics.md deleted file mode 100644 index 2786a9ba22de..000000000000 --- a/docs/content/preview/api/ysql/exprs/aggregate_functions/invocation-syntax-semantics.md +++ /dev/null @@ -1,400 +0,0 @@ ---- -title: Aggregate function syntax and semantics -linkTitle: Invocation syntax and semantics -headerTitle: Aggregate function invocation—SQL syntax and semantics -description: This section specifies the syntax and semantics of aggregate function invocation. -menu: - preview_api: - identifier: aggregate-functions-invocation-syntax-semantics - parent: aggregate-functions - weight: 20 -type: docs ---- - -## Syntax - -### Reproduced from the SELECT statement section - -The following six diagrams, [`select_start`](../../../syntax_resources/grammar_diagrams/#select-start), [`ordinary_aggregate_fn_invocation`](../../../syntax_resources/grammar_diagrams/#ordinary-aggregate-fn-invocation), [`within_group_aggregate_fn_invocation`](../../../syntax_resources/grammar_diagrams/#within-group-aggregate-fn-invocation), [`group_by_clause`](../../../syntax_resources/grammar_diagrams/#group-by-clause), [`grouping_element`](../../../syntax_resources/grammar_diagrams/#grouping-element), and [`having_clause`](../../../syntax_resources/grammar_diagrams/#having-clause) are reproduced from the section that describes the [`SELECT` statement](../../../the-sql-language/statements/dml_select/). - -{{%ebnf%}} - select_start, - ordinary_aggregate_fn_invocation, - within_group_aggregate_fn_invocation -{{%/ebnf%}} -These rules govern the invocation of aggregate functions as `SELECT` list items. - -The aggregate functions listed in the sections [General-purpose aggregate functions](../function-syntax-semantics/#general-purpose-aggregate-functions) and [Statistical aggregate functions](../function-syntax-semantics/#statistical-aggregate-functions) are governed by the `ordinary_aggregate_fn_invocation` rule. These functions may also be invoked as window functions. See the account of the `fn_over_window` rule, and everything else that qualifies this, in the section [Window function invocation—SQL syntax and semantics](../../window_functions/invocation-syntax-semantics/). - -The aggregate functions listed in the sections [Within-group ordered-set aggregate functions](../function-syntax-semantics/#within-group-ordered-set-aggregate-functions) and [Within-group hypothetical-set aggregate functions](../function-syntax-semantics/#within-group-hypothetical-set-aggregate-functions) are governed by the `within_group_aggregate_fn_invocation` rule. "Within-group ordered-set" aggregate functions may _not_ be invoked as window functions. But "within-group hypothetical-set" aggregate functions _may_ be invoked as window functions. The reasons for this difference are explained in the two relevant dedicated sections. - -When aggregate functions are invoked using the syntax specified by either the `ordinary_aggregate_fn_invocation` rule or the `within_group_aggregate_fn_invocation` rule, users very often determine the result set with the `GROUP BY` clause. - -{{%ebnf%}} - group_by_clause, - grouping_element -{{%/ebnf%}} - -The result set may be restricted by the `HAVING` clause: - -{{%ebnf%}} - having_clause -{{%/ebnf%}} - -## Semantics - -### The ordinary_aggregate_fn_invocation rule - -This syntax rule governs the invocation of the aggregate functions that are listed in the [General-purpose aggregate functions](../function-syntax-semantics/#general-purpose-aggregate-functions) and the [Statistical aggregate functions](../function-syntax-semantics/#statistical-aggregate-functions) sections. Notice that (possibly to your surprise) the optional `ORDER BY` clause is used _within_ the parentheses that surround the arguments with which the function is invoked and that there is no comma after the final argument and this clause. Here is an example: - -```plpgsql -drop table if exists t cascade; -create table t( - k int primary key, - class int not null, - v text not null); - -insert into t(k, class, v) -select - (1 + s.v), - case (s.v) < 3 - when true then 1 - else 2 - end, - chr(97 + s.v) -from generate_series(0, 5) as s(v); - -select - class, - array_agg(v order by k desc) as "array_agg(v)", - string_agg(v, ' ~ ' order by k desc) as "string_agg(v)", - jsonb_agg(v order by v desc) as "jsonb_agg", - jsonb_object_agg(v, k order by v desc) as "jsonb_object_agg(v, k)" -from t -group by class -order by class; -``` -It produces this result: - -``` - class | array_agg(v) | string_agg(v) | jsonb_agg | jsonb_object_agg(v, k) --------+--------------+---------------+-----------------+-------------------------- - 1 | {c,b,a} | c ~ b ~ a | ["c", "b", "a"] | {"a": 1, "b": 2, "c": 3} - 2 | {f,e,d} | f ~ e ~ d | ["f", "e", "d"] | {"d": 4, "e": 5, "f": 6} -``` - -This is a simplified version of the example shown in the [`GROUP BY` syntax](../function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#group-by-syntax) section within the [`array_agg()`, `string_agg()`, `jsonb_agg()`, `jsonb_object_agg()`](../function-syntax-semantics/array-string-jsonb-jsonb-object-agg/) section. These three functions: - -- [`array_agg()`](../function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#array-agg), [`string_agg()`](../function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#string-agg), [`jsonb_agg()`](../function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#jsonb-agg) - -are sensitive to the effect of the order of aggregation of the individual values. This is because they produce lists. However, [`jsonb_object_agg()`](../function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#jsonb-object-agg) is _not_ sensitive to the order because the key-value pairs in a JSON object are defined to have no order. And neither is any other aggregate function among those that are governed by the `ordinary_aggregate_fn_invocation` sensitive to ordering. - -The [`string_agg()`](../function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#string-agg) function conveniently illustrates the effect of the `FILTER` clause: - -```plpgsql -select - string_agg(v, ' ~ ' order by k ) filter (where v <> 'f') as "string_agg(v) without f", - string_agg(v, ' ~ ' order by k desc) filter (where v <> 'a') as "string_agg(v) without a" -from t; -``` -This is the result: - -``` - string_agg(v) without f | string_agg(v) without a --------------------------+------------------------- - a ~ b ~ c ~ d ~ e | f ~ e ~ d ~ c ~ b -``` - -### The within_group_aggregate_fn_invocation rule - -This syntax rule governs the invocation of the aggregate functions that are listed in the [Within-group ordered-set aggregate functions](../function-syntax-semantics/#within-group-ordered-set-aggregate-functions) section and the [Within-group hypothetical-set aggregate functions](../function-syntax-semantics/#within-group-hypothetical-set-aggregate-functions) section. - -The [`mode()`](../function-syntax-semantics/mode-percentile-disc-percentile-cont/#mode) function is a "within-group ordered-set" aggregate function. Here's a simple example: - -```plpgsql -drop table if exists t cascade; -create table t( - k int primary key, - class int not null, - v text); - -insert into t(k, class, v) -select - g.v, - ntile(2) over(order by v), - chr(ascii('a') -1 + g.v) -from generate_series(1, 10) as g(v) -union all -values - (11, 1, 'e'), - (12, 2, 'f'), - (13, 2, null), - (14, 2, null), - (15, 2, null); - -\pset null -select k, class, v from t order by class, v nulls last, k; -``` - -This is the result: - -``` - k | class | v -----+-------+-------- - 1 | 1 | a - 2 | 1 | b - 3 | 1 | c - 4 | 1 | d - 5 | 1 | e - 11 | 1 | e - 6 | 2 | f - 12 | 2 | f - 7 | 2 | g - 8 | 2 | h - 9 | 2 | i - 10 | 2 | j - 13 | 2 | - 14 | 2 | - 15 | 2 | -``` - -Now try this: - -```plpgsql -select - class, - mode() within group (order by k desc) as "k mode", - mode() within group (order by v ) as "v mode" -from t -group by class -order by class; -``` - -This is the result: - -``` - class | k mode | v mode --------+--------+-------- - 1 | 11 | e - 2 | 15 | f -``` - -Because _"k"_ happens to be unique, the modal value is chosen arbitrarily from the set of candidate values. It might appear that the `ORBER BY` clause determines which value is chosen. Don't rely on this—it's an undocumented effect of the implementation and might change at some future release boundary. - -Notice that the expression for which the modal value for each value of _"class"_, as the `GROUP BY` clause requests, is specified not as the argument of the [`mode()`](../function-syntax-semantics/mode-percentile-disc-percentile-cont/#mode) function but, rather, as the argument of the invocation's `ORDER BY` clause. This explains why the `within_group_aggregate_fn_invocation` rule specifies that `ORDER BY` is mandatory. If you execute the `\df mode` meta-command in `ysqlsh`, you'll see that both the argument data type and the result data type is `anyelement`. In other words, the argument of the `ORDER BY` clause in the invocation of the [`mode()`](../function-syntax-semantics/mode-percentile-disc-percentile-cont/#mode) aggregate function must be just a single scalar expression. Notice that this is more restrictive than the general case for the `ORDER BY` clause that you use at top level in a subquery or within the window definition for the `OVER` clause that you use to invoke a window function. - -The expression need not correspond just to a bare column, as this example shows: - -```plpgsql -select - mode() within group (order by v||'x') as "expr-1 mode", - mode() within group (order by (case v is null when true then '' else v end)) as "expr-2 mode" -from t; -``` - -This is the result: - -``` - expr-1 mode | expr-2 mode --------------+------------- - ex | -``` - -The parameterization story for the other two "within-group ordered-set" aggregate functions, [`percentile_disc()`](../function-syntax-semantics/mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont) and [`percentile_cont()`](../function-syntax-semantics/mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont), is more subtle. Each has two overloads. One takes a scalar, and the other takes an array. These arguments specify _how_ the functions should determine their result. The expression, for which the result is produced, is specified as the argument of the `ORDER BY` clause. - -The syntax rules for the four [within-group hypothetical-set](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/) aggregate functions, [`rank()`](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#rank), [`dense_rank()`](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#dense-rank), [`percent_rank()`](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#percent-rank), and [`cume_dist()`](../function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#cume-dist), are, as stated, the same as for the [within-group ordered-set](../function-syntax-semantics/mode-percentile-disc-percentile-cont/) aggregate functions. But the semantics are importantly different—and this difference is reflected in how the invocations are parameterized. This is best understood by reading the accounts of the four functions and the general introduction to the section that describes these. Briefly, the argument to the function specifies the value that is to be hypothetically inserted. And the `ORDER BY` argument specifies the expression to which that value will be assigned as a result of the hypothetical insert. - -### The GROUP BY clause - -The `group_by_clause` rule, together with the `grouping_element` rule, show that the `GROUP BY` clause can be composed as a comma-separated list of an unlimited number of terms, each of which can be chosen from a list of five kinds of element. Moreover, the `GROUPING SETS` alternative itself takes a comma-separated list of an unlimited number of terms, each of which can be chosen from the same list of five kinds of element. Further, this freedom can be exercised recursively. Here's an exotic example to illustrate this freedom of composition: - -```plpgsql -drop table if exists t cascade; -create table t( - k int primary key, - g1 int not null, - g2 int not null, - g3 int not null, - g4 int not null, - v int not null); - -insert into t(k, g1, g2, g3, g4, v) -select - g.v, - g.v%2, - g.v%4, - g.v%8, - g.v%16, - g.v*100 -from generate_series(1, 80) as g(v); - -select count(*) as "number of resulting rows" from ( - select g1, g2, g3, g4, avg(v) - from t - group by (), g1, (g2, g3), rollup (g1, g2), cube (g3, g4), grouping sets (g1, g2, (), rollup (g1, g3), cube (g2, g4)) - order by g1 nulls last, g2 nulls last) -as a; -``` - -This is the result: - -``` - number of resulting rows --------------------------- - 1536 -``` - -You can, of course, remove the surrounding `select count(*)... from... as a;` from this: - -``` -select count(*) as "number of resulting rows" from ( - select ...) -as a; -``` - -and look at all _1,536_ resulting rows. But it's very unlikely that you'll be able to discern any meaning from what you see. Here are two more legal examples whose meaning is obscured by the way they're written: - -```plpgsql -select avg(v) -from t -group by (); -``` - -and - -```plpgsql -select g1, avg(v) -from t -group by (), g1; -``` - -The meaning of each of the last three constructs of the five that the `grouping_element` rule allows is explained in the section [Using the `GROUPING SETS`, `ROLLUP`, and `CUBE` syntax for aggregate function invocation](../grouping-sets-rollup-cube/). - -The second construct is the familiar bare list of `GROUP BY` expressions. This may be surrounded by parentheses, and arbitrary sequences of expressions may themselves be surrounded by arbitrary numbers of arbitrarily deeply nested parentheses pairs. However, doing this brings no meaning—just as it brings no meaning in this contrived, but legal, example: - -```plpgsql -select (((((1 + 2)))) + (((((3 + (4))))))) as x; -``` -It produces the answer _10_. - -The first construct, the empty `()` pair has no semantic value except when it's used within, for example, the `ROLLUP` argument. - -The overwhelmingly common way to take advantage of the freedoms that the `grouping_element` rule allows is to use exactly one of the last four constructs and to take advantage of the empty `()` pair in that context. - -### The HAVING clause - -The `HAVING` clause is functionally equivalent to the `WHERE` clause. However, it is legal only in a subquery that has a `GROUP BY` clause, and it must be placed after the `GROUP BY`. First, create and populate a test table: - -```plpgsql -drop table if exists t cascade; -create table t( - k int primary key, - class int not null, - v numeric); - -insert into t(k, class, v) -select - (1 + s.v), - case (s.v) < 5 - when true then 1 - else 2 - end, - case (s.v) <> 4 - when true then (100.0 + s.v)::numeric - else null - end -from generate_series(0, 9) as s(v); - -\pset null -select k, class, v from t order by k; -``` - -This is the result: - -``` - k | class | v -----+-------+-------- - 1 | 1 | 100 - 2 | 1 | 101 - 3 | 1 | 102 - 4 | 1 | 103 - 5 | 1 | - 6 | 2 | 105 - 7 | 2 | 106 - 8 | 2 | 107 - 9 | 2 | 108 - 10 | 2 | 109 -``` - -Now try this counter-example: - -```plpgsql -select v from t having v >= 105; -``` - -It causes this error: - -``` -42803: column "t.v" must appear in the GROUP BY clause or be used in an aggregate function -``` - -The meaning is "...must be used in an expression in the `GROUP BY` clause or be used in an expression in an aggregate function invocation". - -Here is an example of the legal use of the `HAVING` clause: - -```plpgsql -select class, count(v) -from t -group by class -having count(v) > 4 -order by class; -``` - -This is the result: - -``` - class | count --------+------- - 2 | 5 -``` - -This illustrates the use case that motivates the `HAVING` clause: you want to restrict the results using a predicate that references an aggregate function. Try this counter-example: - -```plpgsql -select class, count(v) -from t -where count(v) > 4 -group by class -order by class; -``` - -It causes this error: - -``` -42803: aggregate functions are not allowed in WHERE -``` - -(The error code _42803_ maps to the exception name `grouping_error` in PL/pgSQL.) - -In contrast, this is legal: - -```plpgsql -select class, count(v) -from t -where class = 1 -group by class -order by class; -``` - -The `WHERE` clause restricts the set on which aggregate functions are evaluated. And the `HAVING` clause restricts the result set _after_ aggregation. This informs you that a subquery that uses a `HAVING` clause legally can always be re-written to use a `WHERE` clause, albeit at the cost of increased verbosity, to restrict the result set of a subquery defined in a `WITH` clause, like this: - -```plpgsql -with a as ( - select class, count(v) - from t - group by class) -select class, count -from a -where count > 4 -order by class; -``` diff --git a/docs/content/preview/api/ysql/exprs/geo_partitioning_helper_functions/_index.md b/docs/content/preview/api/ysql/exprs/geo_partitioning_helper_functions/_index.md deleted file mode 100644 index 24f824689696..000000000000 --- a/docs/content/preview/api/ysql/exprs/geo_partitioning_helper_functions/_index.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: Geo-partitioning helper functions -headerTitle: Geo-partitioning helper functions -linkTitle: Geo-partitioning helper functions -description: This section contains all the helper functions for geo-distribution. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: geo-partitioning-helper-functions - parent: api-ysql-exprs - weight: 40 -type: indexpage ---- - -## Synopsis - -The following functions are helpful for [Row-level geo-partitioning](../../../../explore/multi-region-deployments/row-level-geo-partitioning/), as they make it easier to insert rows from a user's server and select rows from the local partition. - -| Function | Return Type |Description | -|-----------|------------|-------------| -| [yb_is_local_table(oid)](func_yb_is_local_table) | boolean | Returns whether the given 'oid' is a table replicated only in the local region | -| [yb_server_region()](func_yb_server_region) | varchar | Returns the region of the currently connected node | -| [yb_server_zone()](func_yb_server_zone) | varchar | Returns the zone of the currently connected node | -| [yb_server_cloud()](func_yb_server_cloud) | varchar | Returns the cloud provider of the currently connected node | diff --git a/docs/content/preview/api/ysql/exprs/sequence_functions/_index.md b/docs/content/preview/api/ysql/exprs/sequence_functions/_index.md deleted file mode 100644 index dce08d92b222..000000000000 --- a/docs/content/preview/api/ysql/exprs/sequence_functions/_index.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Sequence functions [YSQL] -headerTitle: Sequence functions -linkTitle: Sequence functions -description: Functions operationg on sequences -menu: - preview_api: - identifier: sequence-functions - parent: api-ysql-exprs - weight: 50 -type: indexpage ---- - -Sequences are special database objects designed to generate unique numeric identifiers. They are commonly used to create auto-incrementing primary keys for tables but can also serve other use cases requiring unique, sequential numbers. - -A sequence is an independent object in the database that operates outside the scope of a table. It ensures thread-safe, concurrent access to increment or fetch the next value, making it highly reliable in multi-user environments. - -Some of the key features of sequences are as follows - -- Highly efficient for generating unique numbers. -- Customizable with options like starting value, increment, and cycling behavior. -- Supports multiple sequences in a single database. -- Works seamlessly with concurrent database operations. - -YugabyteDB provides several functions to interact with sequences, allowing for precise control over their behavior and values: - -1. [nextval](func_nextval/) - Fetches the next value in the sequence. -1. [currval](func_currval/) - Retrieves the current value of the sequence for the session. -1. [setval](func_setval/) - Sets the sequence to a specific value. -1. [lastval](func_lastval/) - Retrieves the most recent value fetched by nextval in the current session. - -These functions enable seamless integration of sequences into your database logic, whether for automatic ID generation or custom sequence management. diff --git a/docs/content/preview/api/ysql/exprs/window_functions/_index.md b/docs/content/preview/api/ysql/exprs/window_functions/_index.md deleted file mode 100644 index d47e543202da..000000000000 --- a/docs/content/preview/api/ysql/exprs/window_functions/_index.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: YSQL window functions -linkTitle: Window functions -headerTitle: Window functions -description: This section covers the syntax and semantics for all supported window functions in YSQL. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: window-functions - parent: api-ysql-exprs - weight: 60 -aliases: - - /preview/api/ysql/exprs/window_functions -type: indexpage -showRightNav: true ---- -If you are already familiar with window functions, then you can skip straight to the [syntax and semantics](./invocation-syntax-semantics/) section or the section that lists all of [the YSQL window functions](./function-syntax-semantics/) and that links, in turn, to the definitive account of each function. - -This page has only the [Synopsis](./#synopsis) section and the section [Organization of the window functions documentation](./#organization-of-the-window-functions-documentation) section. - -**Note:** Some relational database systems use the term _analytic function_ for what YSQL and PostgreSQL call a _window function_. - -{{< note title="About the code examples" >}} -Each of the code examples uses one of the four tables _"t1"_, _"t2"_, _"t3"_, or _"t4"_. And none of them does any inserts, updates, or deletes. It will speed up your use of these examples it you install all of the tables in one sitting. Start, therefore, with the section [The data sets used by the code examples](./function-syntax-semantics/data-sets/). Then carry on with your study of the rest of this major section. -{{< /note >}} - -## Synopsis - -A window function operates, in general, on each of the set of [_windows_](./invocation-syntax-semantics/#the-window-definition-rule) into which the rows of its input row set are divided, and it produces one value for every row in each of the [_windows_](./invocation-syntax-semantics/#the-window-definition-rule). A [_window_](./invocation-syntax-semantics/#the-window-definition-rule) is defined by having the same values for a set of one or more classifier columns. In the limit, there will be just a single [_window_](./invocation-syntax-semantics/#the-window-definition-rule). In general, the output value of a window function for a particular row takes into account values from all of the rows in the successive [_windows_](./invocation-syntax-semantics/#the-window-definition-rule) on which it operates. - -A window function can be invoked only in a select list item in a subquery; and specific syntax, using the `OVER` clause is essential. The argument of this clause is the [`window_definition`](../../syntax_resources/grammar_diagrams/#window-definition) . The authoritative account is given in the section [Window function invocation—SQL syntax and semantics](./invocation-syntax-semantics). Here is a brief overview. A window definition can have up to three clauses, thus: - -- The `PARTITION BY` clause. This specifies the rule that divides the input row set into two or more [_windows_](./invocation-syntax-semantics/#the-window-definition-rule). If it's omitted, then the input row set is treated as a single [_window_](./invocation-syntax-semantics/#the-window-definition-rule). -- The window `ORDER BY` clause. This specifies the rule that determines how the rows within each [_window_](./invocation-syntax-semantics/#the-window-definition-rule) are ordered. If it's omitted, then the ordering is unpredictable—and therefore the output of the window function is meaningless. -- The [`frame_clause`](../../syntax_resources/grammar_diagrams/#frame-clause). This is significant for only some of the eleven window functions. It determines which rows within each [_window_](./invocation-syntax-semantics/#the-window-definition-rule) are taken as the input for the window function. For example, you can use it to exclude the current row; or you can say that the function should look only at the rows in the [_window_](./invocation-syntax-semantics/#the-window-definition-rule) from the start through the current row. - -Examples of each of these clauses are shown in the section [Informal overview of function invocation using the OVER clause](./functionality-overview). - -Window functions are similar to aggregate functions in this way: - -- Each operates on each of possibly many [_windows_](./invocation-syntax-semantics/#the-window-definition-rule) of a row set. - -**Note:** As mentioned above, the row set for a window function can be defined _only_ by using the `PARTITION BY` clause within a [`window_definition`](../../syntax_resources/grammar_diagrams/#window-definition). The row set for an aggregate function _may_ be defined in this way. But it may alternatively be defined by the regular `GROUP BY` clause at the syntax spot in a subquery that follows the `WHERE` clause's spot. - -Window functions differ from aggregate functions in this way: - -- A window function produces, in general, a different output value for _each different input row_ in the [_window_](./invocation-syntax-semantics/#the-window-definition-rule). -- An aggregate function, when it's invoked in the same way as a window function, often produces the _same value_ for _each different input row_ in the [_window_](./invocation-syntax-semantics/#the-window-definition-rule). (The exact behavior depends on what the [frame clause](./invocation-syntax-semantics/#the-frame-clause-1) specifies.) -- When an aggregate function is invoked using the regular `GROUP BY` clause, it produces a _single value_ for each whole [_window_](./invocation-syntax-semantics/#the-window-definition-rule) that the `GROUP BY` clause defines. - -## Organization of the window functions documentation - -The remaining pages are organized as follows: - -### Informal overview of function invocation using the OVER clause - -**[Here](./functionality-overview/)**. Skip this section entirely if you are already familiar with window functions. It presents five code examples. This section focuses on the _effect_ that each illustrated function has. It leaves their formal definitions to the [invocation syntax and semantics](./invocation-syntax-semantics/) section. - -### Window function invocation—SQL syntax and semantics - -**[Here](./invocation-syntax-semantics/)**. This section presents the formal treatment of the syntax and semantics of how a window function, or an aggregate function, is invoked as a special kind of `SELECT` list item in conjunction with the `OVER` keyword. - -### Signature and purpose of each window function - -**[Here](./function-syntax-semantics/)**. The following list groups the eleven window functions in the same way that the sidebar items group them. The rationale for the grouping is explained in the referenced sections. - -      [`row_number()`](./function-syntax-semantics/row-number-rank-dense-rank/#row-number)
-      [`rank()`](./function-syntax-semantics/row-number-rank-dense-rank/#rank)
-      [`dense_rank()`](./function-syntax-semantics/row-number-rank-dense-rank/#dense-rank) - -      [`percent_rank()`](./function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank)
-      [`cume_dist()`](./function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist)
-      [`ntile()`](./function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile) - -      [`first_value()`](./function-syntax-semantics/first-value-nth-value-last-value/#first-value)
-      [`nth_value()`](./function-syntax-semantics/first-value-nth-value-last-value/#nth-value)
-      [`last_value()`](./function-syntax-semantics/first-value-nth-value-last-value/#last-value) - -      [`lag()`](./function-syntax-semantics/lag-lead/#lag)
-      [`lead()`](./function-syntax-semantics/lag-lead/#lead) - -      [The data sets used by the code examples](./function-syntax-semantics/data-sets/) - -### Analyzing a normal distribution with percent_rank(), cume_dist() and ntile() - -**[Here](./analyzing-a-normal-distribution/)**. Regard this section as an optional extra. It answers an interesting question: - -- If you want to allocate a row set into buckets where each contains the same number of rows, based on your ordering rule, is there any difference between the result produced by using, in turn, each of the three functions `percent_rank()`, `cume_dist()`, or `ntile()`? - -The answer is, of course, "Yes". But it's a qualified "Yes" because when certain conditions hold, there is _no_ difference. The value that the study brings is due to the fact that it aims to meet a high-level goal rather than to demonstrate the basic use of low-level primitives. This means that it necessarily combines the basic use of the window functions under study with all sorts of other generic SQL techniques (and even stored procedure techniques) because these are needed to meet the goal. This kind of bigger-picture use typically goes hand-in-hand with any serious use of window functions. diff --git a/docs/content/preview/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/_index.md b/docs/content/preview/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/_index.md deleted file mode 100644 index 0a79e5363f91..000000000000 --- a/docs/content/preview/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/_index.md +++ /dev/null @@ -1,303 +0,0 @@ ---- -title: > - Case study: compare percent_rank(), cume_dist(), and ntile() on a normal distribution -linkTitle: > - Case study: analyzing a normal distribution -headerTitle: > - Case study: analyzing a normal distribution with percent_rank(), cume_dist(), and ntile() -description: Case study to compare and contrast the window functions percent_rank(), cume_dist(), and ntile() on large sets of normally distributed values. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: analyzing-a-normal-distribution - parent: window-functions - weight: 40 -type: indexpage -showRightNav: true ---- - -## Introduction - -This section describes an empirical approach to answering the following question: - -- If you want to allocate a row set into buckets where each contains the same number of rows, based on your ordering rule, is there any difference between the result produced by using, in turn, each of the three functions [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank), [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist), or [`ntile()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile)? - -The answer is, of course, "Yes"—why else would the three functions all be supported? The [`ntile()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile) function is specified exactly to meet the stated goal. From the dedicated [`ntile()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile) section: - -> **Purpose:** Return an integer value for each row that maps it to a corresponding percentile. For example, if you wanted to mark the boundaries between the highest-ranking 20% of rows, the next-ranking 20% of rows, and so on, then you would use `ntile(5)`. The top 20% of rows would be marked with _1_, the next-to-top 20% of rows would be marked with _2_, and so on so that the bottom 20% of rows would be marked with _5_. - -The other two functions implement more fine grained measures. Here's what the [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank) documentation says: - -> **Purpose:** Return the percentile rank of each row within the [_window_](../invocation-syntax-semantics/#the-window-definition-rule), with respect to the argument of the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition)'s window `ORDER BY` clause. The value _p_ returned by `percent_rank()` is a number in the range _0 <= p <= 1_. It is calculated like this: -``` -percentile_rank = (rank - 1) / ("no. of rows in window" - 1) -``` -And here's what the [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist) documentation says: - -> **Purpose:** Return a value that represents the number of rows with values less than or equal to the current row’s value divided by the total number of rows—in other words, the relative position of a value in a set of values. The graph of all values of `cume_dist()` within the [_window_](../invocation-syntax-semantics/#the-window-definition-rule) is known as the cumulative distribution of the argument of the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition)'s window `ORDER BY` clause. The value _c_ returned by `cume_dist()` is a number in the range _0 < c <= 1_. It is calculated like this: -``` -cume_dist() = - "no of rows with a value <= the current row's value" / - "no. of rows in window" -``` -The algorithm that the [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank) uses is different from the one that [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist) uses. And the algorithm that `ntile()` uses to produce its bucket allocations directly is unspecified. - -However, the answer "Yes" to the question "is there any difference between the result produced by each of the three functions?" is a qualified "Yes" because when certain conditions hold, there is no difference. - -The pedagogic value of this empirical study is brought by the fact that it aims to meet the stated high-level goal rather than to demonstrate the basic use of low-level primitives. This means that it necessarily combines the basic use of the window functions under study with all sorts of other generic SQL techniques (and even stored procedure techniques) because these are needed to meet the goal. This kind of bigger-picture use typically goes hand-in-hand with any serious use of window functions. - -Here is the problem statement at the next level of detail: - -- A row set with _N_ rows has only unique values of the column list that the OVER clause specifies for ordering the rows in the [_window_](../invocation-syntax-semantics/#the-window-definition-rule). (In other words, there are no ties.) -- The aim is to allocate the rows into _n_ buckets that each have the same number of rows. -- _N_ is an integral multiple of _n_. - -The study shows that if these special qualifications are met, then the bucket allocations are identical. It shows, too, that if either of those special qualifications isn't met (in other words, either if there are ties, or if _N_ is not an integral multiple of _n_), then the bucket allocations produced by each approach differ. - -## How to make best use of the code examples -Before starting, create the data set that the code relies on using the `ysqlsh` script that [table t4](../function-syntax-semantics/data-sets/table-t4/) presents. It takes only a few seconds to run, and then you can simply leave _"t4"_ untouched as you run, and re-run, the code examples. Notice that the table is populated by values that are created by a generator that picks, pseudorandomly, from an ideal normal distribution. This means that the values in table _"t4"_ will be different each time that you drop and re-create it. The code examples will therefore always show the same patterns. And the invariants that it illustrates will hold reliably. But the details will change with each new re-creation of _"t4"_. Your experience will be best if, as you step through the code examples, you run them all against fixed content in table _"t4"_. - -The steps that are described in the next section should all be run at the `ysqlsh` prompt. They do a mixture of things: - -- They use `.sql` scripts to drop and re-create schema objects of these kinds: - - views through which to access the `double precision` column, and then the `int` column, in table _"t4"_ through a uniform interface, so that the same-spelled queries can run in turn against each of these two columns - - tables to hold results from different queries so that later queries can compare these - - views through which to populate the separate results tables for the `double precision` column, and then the `int` column, through a uniform interface, so that the same-spelled reports can run in turn against each of these two columns - - PL/pgSQL procedures to populate the results tables and PL/pgSQL functions to encapsulate prepared queries. -- They use `.sql` scripts to run various queries, and execute various anonymous PL/pgSQL blocks (also known as `DO` blocks) that are too small to warrant encapsulation in functions and are best encapsulated in dedicated `.sql` scripts, -- And they invoke the functions and the procedures, and execute the `.sql` scripts, using small anonymous scripts that are simply presented in line in the account. - -It's best to create a dedicated user for the purpose of running this code that owns no other objects. - -Each of the `.sql` scripts is presented on a dedicated page. (You can see all of these in the order in which they are to be run in the navigation bar.) Each page starts with a sentence like this: - -> Save this script as `.sql`. - -Before starting, you should create a directory (it doesn't matter what you call it) on which to save all of these scripts. Be sure to save each successive file as instructed. The names are critical because the last script, [`do_demo.sql`](./do-demo/), invokes each of these in the proper order. - -When you go through everything manually, you'll appreciate what each step does, and see the result (when it does more than create a schema object) immediately. That way, you'll be able to compare your results with the typical results that the following section presents and appreciate how the results vary in inessential ways each time that table _"t4"_ is dropped and re-created. - -The directory you create to hold the saved scripts must have a subdirectory called `reports`. This is where the master script, [`do_demo.sql`](./do-demo/), spools its output. The set of scripts are written so that they can simply be run time and again without reporting any errors. This, together with the fact the master script spools all results, means that it will finish silently. - -When you've got this far, you can save the spooled files to the side and then run the script to drop, re-create, and re-populate table _"t4"_. Then you can _diff_ the old results with the new results to see what changes and what stays the same. You could also change the number of rows with which _"t4"_ is populated so the it is _not_ an integral multiple of the number of buckets that you request for the allocations. (You can change this number too, of course.) Then you'll see how dramatically the results change. - -## Step through the code - -### Step ZERO - -Drop and re-create the results tables using [`do_clean_start.sql`](./do-clean-start/). - -### Step ONE - -To confirm that the outcome is sensible, first create the table function `show_t4()` with [this script](./cr-show-t4/). It reports some useful overall measures of _"t4"_. Then execute it like this: -```plpgsql -select t as "Some useful overall measures of t4." -from show_t4(); -``` -Because of the pseudorandom behavior, the actual values of the mean and standard deviation will change with each successive re-population of _"t4"_. These results are typical: -``` - Some useful overall measures of t4. ------------------------------------------- - count(*) 100000 - - avg(%score) 52.4 - stddev(%score) 11.6 -``` -Now get a sense of how similar the values returned by [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank) and [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist) are if (and only if) the values that the window `ORDER BY` uses are unique in the [_window_](../invocation-syntax-semantics/#the-window-definition-rule). - -- Use [`cr_dp_views.sql`](./cr-dp-views/) to create a view to present _"t4.dp_score"_ as _"score"_ . -- Use [`cr_pr_cd_equality_report.sql`](./cr-pr-cd-equality-report) to create the function `pr_cd_equality_report()` . The name means 'compare the extent to which _"pr"_ (the value produced by [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank)) is similar to _"cr"_ (the value produced by [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist))' for each of the _"score"_ values. The values are compared as a ratio (expressed as a percent). The reporting function is parameterized by the absolute difference, _"delta_threshold"_ that this ratio might have from 100%. And it reports the count, the maximum score, and the maximum ratio over the scores where _"delta_" is greater than _"delta_threshold"_. - -Run the function with five different values for _"delta_threshold"_ like this: - -```plpgsql -select * from pr_cd_equality_report(0.50); -select * from pr_cd_equality_report(0.10); -select * from pr_cd_equality_report(0.05); -select * from pr_cd_equality_report(0.01); -``` -Here is a summary of the results: -``` - count(*) | max_score | max_ratio -----------+-----------+----------- - 199 | 19.19 | 99.50 - 990 | 25.53 | 99.90 - 1960 | 28.55 | 99.95 - 9090 | 36.99 | 99.99 -``` -The results show that: - -- once you have passed the lowest _10,000_ or so values (that is about _10%_ of the total _100,000_ number of rows), the ratio of the two measures is never more than _0.01%_ away from _100%_. -- and once you have passed the lowest _200_ or so values (that is about _0.2%_ of the total _100,000_ number of rows), the ratio of the two measures is never more than _0.5%_ away from _100%_. - -In other words, the two measures give remarkably similar answers over most of the high end of the range of values. - -Now repeat the measurement for the values in the _"t4.int_score"_ column—where each possible value as about _1,000_ duplicates. Use [`cr_int_views.sql`](./cr-int-views/) to do this. And then repeat the five function executions. Now the results are very different: - -``` - count(*) | max_score | max_ratio -----------+-----------+----------- - 97717 | 75.00 | 99.46 - 99541 | 82.00 | 99.86 - 99782 | 85.00 | 99.95 - 99972 | 91.00 | 99.99 -``` -This shouldn't be surprising. Statisticians who use these functions will know when their analysis needs one, or the other, of [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank)) and [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist)). - -### Step TWO - -Create a function to visualize the data as a histogram. This relies on allocating the values in the column _"t4.dp_score"_ into equal width buckets. The same allocation scheme will be later needed for the output values of [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank) and [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist). This turns out to be a somewhat challenging problem. It is discussed in the section [The bucket allocation scheme](./bucket-allocation). - -Now that you've understood the challenge, create the `bucket()` function using either [`cr_bucket_using_width_bucket.sql`](./cr-bucket-using-width-bucket/) or [`cr_bucket_dedicated_code.sql`](./cr-bucket-dedicated-code/)—it doesn't matter which because they behave identically. - -Test that the behavior is correct with [`do_assert_bucket_ok`](./do-assert-bucket-ok/). - -Next, create the `language sql` function that creates the histogram output, [cr_histogram.sql](./cr-histogram/). Notice that this is functionally equivalent to this - -``` -prepare do_histogram( - int, -- No. of buckets - numeric -- Scale factor for histogram height -) as -select ... ; -``` -but it simply uses a different header -``` -create or replace function do_histogram( - no_of_bukets in int, - scale_factor in numeric) - returns SETOF text - language sql -as $body$ -``` -to encapsulate the identical SQL text. But it has the advantage that it can be done once when the application's database artifacts are installed rather than at the start of every session by each session that needs it. It also as the additional benefit that you can use named formal parameters to make the SQL more readable. - -Now generate the histogram like this: -```plpgsql -select * from histogram(50, 100); -``` -You can see typical results here: [Output from running `histogram()` on _"t4.dp_score"_](./reports/histogram-report/). - -### Step THREE - -- Compare the bucket allocation produced by [`ntile()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile), [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank), and [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist) acting on the `double precision` column _"dp_score"_. - -Create the following three functions: - -- `do_ntile()` using [`cr_do_ntile.sql`](./cr-do-ntile/) -- `do_do_percent_rank()` using [`cr_do_percent_rank.sql`](./cr-do-percent-rank/) -- `do_cume_dist()` using [`cr_do_cume_dist.sql`](./cr-do-cume-dist/). - -Each computes the bucket allocation by accessing the data in table _"t4"_ through the view _"t4_view"_. - -First run [`cr_dp_views.sql`](./cr-dp-views/). This creates the view _"t4_view"_ to select _"t4.dp_score"_ as _"score"_ and the view _"results"_ over the table _"dp_results"_. - -Each of the window functions under test will now do this: -- Compute the bucket allocation for each _"t4.dp_score"_ value. -- Group these by the bucket number and compute the count, the minimum score, and the maximum score for each group. -- Insert this summary information into the table _"dp_results"_, annotating each row with the name of the window function that was used. - -Do this using [`do_populate_results.sql`](./do-populate-results/). - -Now report the results using [`do_report_results.sql`](./do-report-results/). - -You can see typical results here: [Output from running `do_ntile()`, `do_percent_rank()`, and `do_cume_dist()` on _"t4.dp_score"_](./reports/dp-results/). - -You can see at a glance that the bucket allocations produced by each method seem to be the same. It's certainly clear that the population of each bucket is, as requested, `100,000/20 = 5,000` for each bucket and for each method. However, it's too hard to check that the minimum and maximum score values for each of the _60_ buckets is the same. Use the script [`do_compare_dp_results.sql`](./do-compare-dp-results/) to check this. - -You can see the results here: [Output from running `do_compare_dp_results.sql` on _"dp_results_"](./reports/compare-dp-results/). These results are reproducible, no matter what the pseudorandomly generated values in table _"t4"_ are. - -The property of the three functions under test that this study aimed to investigate has been seen to hold. You can illustrate this by repeating the whole test by regenerating the source data using the script that [table t4](../function-syntax-semantics/data-sets/table-t4/) presents. When you do this, you can change the number of rows that are generated. And you can change the number of buckets that you request by editing [`do_populate_results.sql`](./do-populate-results/). Just be sure that the number of rows in table _"t4"_ is an exact multiple of the number of buckets that you request. - -Then simply run the master script, [`do_demo.sql`](./do-demo/) and inspect the spooled output. - -Of course, this isn't a _proof_ that the test will always have this outcome. But it is sufficient to show that the three functions do, at least, produced very similar results for this particular, and rather special use case. - -### Step FOUR - -Finally, repeat the test when one of the special conditions of the particular use case doesn't hold—when the values that the three functions operate on have duplicates: - -- Compare the bucket allocation produced by [`ntile()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile), [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank), and [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist) acting on the `int` column _"int_score"_. - -Simply run [`cr_int_views.sql`](./cr-int-views/) to create the view _"t4_view"_ to select _"t4.int_score"_ as _"score"_ and the view _"results"_ over the table _"int_results"_. Now re-run the remaining steps as above: - -- [`do_populate_results.sql`](./do-populate-results/) -- [`do_report_results.sql`](./do-report-results/) - -You can see typical results here: [Output from running `do_ntile()`, `do_percent_rank()`, and `do_cume_dist()` on _"t4.int_score"_](./reports/int-results/). - -It's immediately obvious that the three functions under test produce dramatically different results on this, more general, kind of input. - -You can illustrate that the three functions under test produce different output when the special conditions do not hold by, for example, repeating the whole test and after setting the number of buckets that you request, by editing [`do_populate_results.sql`](./do-populate-results/), so that the number of rows in table _"t4"_ is _not_ an exact multiple of the number of buckets. - -Then simply run the master script, [`do_demo.sql`](./do-demo/) and inspect the spooled output. Notice now you see different results from the three functions even when they operate on _"t4.dp_score"_—which as only unique values. - -## Conclusion - -This study has shown that if, and only if, these special conditions hold: - -- a row set with _N_ rows has only unique values of the column list that the OVER clause specifies for ordering the rows in the [_window_](../invocation-syntax-semantics/#the-window-definition-rule)—In other words, there are no ties -- the aim is to allocate the rows into _n_ buckets that each have the same number of rows -- _N_ is an integral multiple of _n_ - -then the three functions under test, [`ntile()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile), [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank), and [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist), all produce the same bucket allocation. - -It showed, too, that if not all of these conditions hold, then each of the three functions produces a different bucket allocation. - -The moral of the tale is clear: - -- If your aim is to allocate rows into, as close as is possible, equiheight buckets, then you should, of course, use the window function that is specified to meet this goal: [`ntile()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile). -- If you have more subtle requirements that cannot be met by [`ntile()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile), then you are probably well-versed in statistics and will understand in advance the different goals that [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank) and [`cume_dist()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#cume-dist) are specified to meet and will simply know, _a priori_ which one of these two to choose. - -The study has also brought, as promised, several generic pedagogic benefits. And it did this precisely because it set out to meet a goal that was stated functionally in high-level terms. In other words, it did _not_ set out to meet any specific pedagogic goals. Rather, the techniques that were used emerged as a consequence of meeting the high-level goal. - -Here's a list of some of these generic pedagogic benefits. - -### Use of the normal_rand() function. - -This function is useful in all sorts of generic testing scenarios. And it's often much better to use this than to use the ordinary `generate_series()` built-in. - -### Using gen_random_uuid() to populate a surrogate primary key column - -You saw that this idiom: -``` -k uuid default gen_random_uuid() primary key -``` -allows bulk insert into a table to go very much faster than this more familiar idiom: -``` -k serial primary key -``` -The reason for the speed difference is that `gen_random_uuid()` reliably produces unique values entirely programmatically while `serial` implies the use of a sequence—and such use brings the expense of inter-node coordinating round trips, in a multi-node distributed SQL database deployment, to guarantee that the values that are delivered are unique. - -### The use of a view, for both SELECT and INSERT, to point to different tables - -Notice these two scripts: [`cr_dp_views.sql`](./cr-dp-views/) and [`cr_int_views.sql`](./cr-int-views/). They allow: - -- _both_—the code that runs the window functions to be defined just once and to be switched to read from either the column with duplicate values or the column with unique values; -- _and_—the code that inserts into the results tables to be defined just once and to be switched correspondingly to write the results to the table that's appropriate for the present test. - -This technique is often used in test scenarios where the same test is run under two, or more, conditions. The technique is _not recommended_ for production use because of the various well-know problems brought by doing DDL in such an environment. - -### Using the WITH clause - -The `WITH` clause brings the obvious benefit that you can name intermediate result sets so that your SQL become very much more readable, and therefore very much more likely to implement what you intend, than without such naming, - -In particular, it is a far more self-evident way to implement self-joins than was possible, ages ago, before the advent of this device. (This technique was used in the script [`do_compare_dp_results.sql`](./do-compare-dp-results/).) - -### Using the ordinary GROUP BY clause in conjunction with window functions - -Very often, a window function is used in a `WITH` clause to specify intermediate results that then are reduced in volume in subsequent `WITH` clauses to meet the ultimate purpose of the SQL statement. - -### The use of "language plpgsql" procedures - -See, for example, the script that [table t4](../function-syntax-semantics/data-sets/table-t4/) presents. The procedure encapsulates several steps that each builds on the next in a way that you cannot manage to do easily (or at all) using a single SQL statement. In particular, this procedure populates an array with SQL, processes its content using SQL on the array rather than on a schema-level table, and then uses SQL again to insert the array's content directly into the target table. In other words, it uses SQL, in the implementation of procedural code, to bring the famous benefits of set-based processing, rather than iterative programming in a procedural loop. This is a very powerful technique. - -### The use of "language sql" functions and procedures as an alternative to PREPARE - -This brings several benefits over `PREPARE`: you can use named formal parameters to improve readability; you shift the responsibility from application code start-up time, for every new session, to a one-time database installation when you install all of the artifacts that implement that application's database back end; and you have a single point of maintenance for the SQL that the stored function or procedure encapsulates. - -### The contrast between ntile() and width_bucket() - -You saw that `ntile()` produces an equiheight histogram while `width_bucket()` produces an equiwidth histogram. The former is a window function; but the latter is a regular function. This reflects the fact that the latter is rather older in the history of databases than the former: it requires you to calculate the lower and upper bounds of the [_window_](../invocation-syntax-semantics/#the-window-definition-rule) yourself, and supply these as actual arguments. You might use the window functions [`first_value`](../function-syntax-semantics/first-value-nth-value-last-value/#first-value) and [`last_value`](../function-syntax-semantics/first-value-nth-value-last-value/#last-value) for this. And you might decide to implement `my_width_bucket` as a window function. YSQL has an extensibility framework (beyond the scope of this section) that would allow this. - -It is to be hoped, therefore, that you might dip into this optional section periodically to seek inspiration for techniques that you might use when you have a new problem to solve. diff --git a/docs/content/preview/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/reports/_index.md b/docs/content/preview/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/reports/_index.md deleted file mode 100644 index 5f239345e727..000000000000 --- a/docs/content/preview/api/ysql/exprs/window_functions/analyzing-a-normal-distribution/reports/_index.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -title: Reports -linkTitle: Reports -headerTitle: Reports -description: Part of the code kit for the "Analyzing a normal distribution" section within the YSQL window functions documentation (reports of results). -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: normal-distribution-analysis-reports - parent: analyzing-a-normal-distribution - weight: 200 -type: indexpage ---- -**This page links to the following reports**
-       
-       [Output from running `histogram()` on _"t4.dp_score"_](./histogram-report/)
-       [Output from running `do_ntile()`, `do_percent_rank()`, and `do_cume_dist()` on _"t4.dp_score"_](./dp-results/)
-       [Output from running `do_compare_dp_results.sql` on _"dp_results_"](./compare-dp-results/)
-       [Output from running `do_ntile()`, `do_percent_rank()`, and `do_cume_dist()` on _"t4.int_score"_](./int-results/)
diff --git a/docs/content/preview/api/ysql/exprs/window_functions/function-syntax-semantics/_index.md b/docs/content/preview/api/ysql/exprs/window_functions/function-syntax-semantics/_index.md deleted file mode 100644 index 4153e06e715f..000000000000 --- a/docs/content/preview/api/ysql/exprs/window_functions/function-syntax-semantics/_index.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -title: YSQL window functions signature and purpose -linkTitle: Per function signature and purpose -headerTitle: Signature and purpose of each window function -description: This section summarizes the signature and purpose of each of the YSQL window functions and links to their individual accounts. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: window-function-syntax-semantics - parent: window-functions - weight: 30 -aliases: - - /preview/api/ysql/exprs/window_functions -type: indexpage -showRightNav: true ---- - -The two tables at the end classify the eleven built-in window functions into two groups according to their general common characteristics. - -**Note:** The navigation bar lists these window functions in four functional groups. The members in each group bear a strong family resemblance to each other. The first two groups list functions from the [first table](./#window-functions-that-return-an-int-or-double-precision-value-as-a-classifier-of-the-rank-of-the-row-within-its-window) below. And the second two groups list functions from the [second table](./#window-functions-that-return-column-s-of-another-row-within-the-window). - -### Aggregate function variants - -A few of these also have an aggregate function variant. This can be seen with the `\df` meta-command. For example, `df lag` shows this: - -``` - Result data type | Argument data types | Type -------------------+----------------------------------------+-------- - bigint | | window - bigint | VARIADIC "any" ORDER BY VARIADIC "any" | agg -``` -This property is marked by _"Y"_ in the column _"agg?"_ in the following tables; a blank in this column means that the entry has only a window function variant. - -{{< note title="Functions with both a 'window' and an 'aggregate' variant" >}} - -The definitive description of the use, as an aggregate function, of a window function that has such a variant, is described within the [Aggregate functions](../../aggregate_functions/) major section in the section [Within-group hypothetical-set aggregate functions -](../../aggregate_functions/function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/). - -{{< /note >}} - -### Frame_clause sensitivity - -The results of all of the window functions depend upon what the window `ORDER BY` clause and the optional `PARTITION BY` clause say. Though you don't get an error if you omit the window `ORDER BY` clause, its omission brings unpredictable and therefore meaningless results. - -The results of a few of the window functions _are_ sensitive to what the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) says. This is marked by _"Y"_ in the column _"frame?"_ in the following tables; a blank in this column means that the entry is _not_ sensitive to what the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) says. - -#### Frame_clause-insensitive window functions - -All of the window functions listed in the first table, and `lag()` and `lead()` from the second table, are insensitive to whatever the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) might say. You can easily show this by trying a few variants like, for example, this: - -``` --- Counter example. Don't use this for window functions --- that aren't frame_clause-sensitive. -range between 1 preceding and 1 following exclude current row -``` - -It says "consider only the row immediately before and the row immediately after the current row". You'll see that including this, or any other variant, in the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) brings the identical result to what including only the window `ORDER BY` clause brings for each of the window functions that aren't sensitive to the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause). - -Yugabyte recommends that you never include a [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) in the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) that you use when you invoke a window function that isn't sensitive to the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause). - -#### Frame_clause-sensitive window functions - -The names of the other window functions that the second table lists, `first_value()`, `nth_value()` and `last_value()`, tell you that the output of each makes obvious sense when the scope within which the specified row is found is the entire [_window_](../invocation-syntax-semantics/#the-window-definition-rule). The results of these three functions certainly _are_ sensitive to the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause). This is the default for the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause): - -``` - -- This specification is used if you omit the frame_clause. - -- You probably don't want the results that this specifies. - between unbounded preceding and current row -``` - -See the section [Window function invocation—SQL syntax and semantics](../invocation-syntax-semantics). But the default does _not_ specify the entire [_window_](../invocation-syntax-semantics/#the-window-definition-rule). To do this, use this variant: - -```plpgsql --- You must specify this explicitly unless you are sure --- that you want a different specification. -range between unbounded preceding and unbounded following -``` - -Yugabyte recommends, therefore, that you include [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) in the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) that you use when you invoke a window function that is sensitive to the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause), unless you have one of the very rare use cases where the output that you want is produced by a different [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause). - -**Note:** The [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause)'s many variants are useful when an aggregate function is invoked using the `OVER` clause. The section [Using the aggregate function `avg()` to compute a moving average](./#using-the-aggregate-function-avg-to-compute-a-moving-average) shows an example. - -### Window functions that return an "int" or "double precision" value as a "classifier" of the rank of the row within its window - -The only information, in the input row set, that the functions in this group depend upon is the emergent order as determined by the window `ORDER BY` clause. The actual order depends in the usual way on what this clause says. With the one exception of `ntile()`, these functions have no formal parameters. The `ntile()` function has a single, mandatory `int` formal parameter. It specifies the number of subsets into which the input row set should be classified. This means that it reflects only the invoker's intention and does not reflect the shape of the input. - -| Function | agg? | frame? | Description | -| ---- | ---- | ---- | ---- | -| [`row_number()`](./row-number-rank-dense-rank/#row-number) | | | Returns a unique integer for each row in a [_window_](../invocation-syntax-semantics/#the-window-definition-rule), from a dense series that starts with _1_, according to the emergent order that the window `ORDER BY` clause specifies. For the two or more rows in a tie group, the unique values are assigned randomly. | -| [`rank()`](./row-number-rank-dense-rank/#rank) | Y | | Returns the integer ordinal rank of each row according to the emergent order that the window `ORDER BY` clause specifies. The series of values starts with 1 but, when the [_window_](../invocation-syntax-semantics/#the-window-definition-rule) contains ties, the series is not dense. The "ordinal rank" notion is familiar from sporting events. If three runners reach the finish line at the same time, then they are all deemed to have tied for first place. The runner who finishes next after these is deemed to have come in fourth place because three runners came in before this finisher. | -| [`dense_rank()`](./row-number-rank-dense-rank/#dense-rank) | Y | | Returns the integer ordinal rank of the distinct value of each row according to what the window `ORDER BY` clause specifies. The series of values starts with _1_ and, even when the [_window_](../invocation-syntax-semantics/#the-window-definition-rule) contains ties, the series is dense. The "dense rank" notion reflects the ordering of distinct values of the list of expressions that the window `ORDER BY` clause specifies. In the running race example, the three runners who tied for first place would get a dense rank of 1. And the runner who finished next after these would get a dense rank of 2, because this finisher got the second fastest distinct finish time. | -| [`percent_rank()`](./percent-rank-cume-dist-ntile/#percent-rank) | Y | | Returns the percentile rank of each row within the [_window_](../invocation-syntax-semantics/#the-window-definition-rule), with respect to the argument of the [`window_definition`](./#the-window-definition-rule)'s window `ORDER BY` clause, as a number in the range _0.0_ through _1.0_. The lowest value of `percent_rank()` within the [_window_](../invocation-syntax-semantics/#the-window-definition-rule) will always be _0.0_, even when there is a tie between the lowest-ranking rows. The highest possible value of `percent_rank()` within the [_window_](../invocation-syntax-semantics/#the-window-definition-rule) is _1.0_, but this value is seen only when the highest-ranking row has no ties. If the highest-ranking row does have ties, then the highest value of `percent_rank()` within the [_window_](../invocation-syntax-semantics/#the-window-definition-rule) will be correspondingly less than _1.0_ according to how many rows tie for the top rank. The notion is well-known from statistics. More details are given in this function's dedicated account. | -| [`cume_dist()`](./percent-rank-cume-dist-ntile/#cume-dist) | Y | | Returns a value that represents the number of rows with values less than or equal to the current row's value divided by the total number of rows—in other words, the relative position of a value in a set of values. The graph of all values of `cume_dist()` within the [_window_](../invocation-syntax-semantics/#the-window-definition-rule) is known as the cumulative distribution of the argument of the [`window_definition`](./#the-window-definition-rule)'s window `ORDER BY` clause. The value _c_ returned by `cume_dist()` is a number in the range _0 < c <= 1_. You can use `cume_dist()` to answer questions like this: Show me the rows whose score is within the top "x%" of the [_window_](../invocation-syntax-semantics/#the-window-definition-rule)'s population, ranked by score. The notion is well-known from statistics. More details are given in this function's dedicated account. | -| [`ntile()`](./percent-rank-cume-dist-ntile/#ntile) | | | Returns an integer value for each row that maps it to a corresponding percentile. For example, if you wanted to mark the boundaries between the highest-ranking 20% of rows, the next-ranking 20% of rows, and so on, then you would use `ntile(5)`. The top 20% of rows would be marked with _1_, the next-to-top 20% of rows would be marked with _2_, and so on, so that the bottom 20% of rows would be marked with _5_. If the number of rows in the [_window_](../invocation-syntax-semantics/#the-window-definition-rule), _N_, is a multiple of the actual value with which you invoke `ntile()`, `n`, then each percentile set would have exactly _N/n_ rows. This is achieved, if there are ties right at the boundary between two percentile sets, by randomly assigning some to one set and some to the other. If _N_ is not a multiple of _n_, then `ntile()` assigns the rows to the percentile sets so that the numbers assigned to each are as close as possible to being the same. | - -### Window functions that return columns of another row within the window - -The functions in this group depend, in the same way as those in the first group do, upon the emergent order as determined by the window `ORDER BY` clause. Each has, at least, a single, mandatory, `anyelement` formal parameter that specifies which value to fetch from the designated other row in the [_window_](../invocation-syntax-semantics/#the-window-definition-rule). If you want to fetch values from more than one column, you must combine them into a scalar value. The most obvious way to do this is to list the columns in the constructor for a user-defined _"row"_ type. The `nth_value()` function has a mandatory second `int` formal parameter that specifies the value of _"N"_ to give _"Nth"_ its meaning. The other functions in this group have just one formal parameter. - -| Function | agg? | frame? | Description | -| ---- | ---- | ---- | ---- | -| [`first_value()`](./first-value-nth-value-last-value/#first-value) | | Y | Returns the specified value from the first row, in the specified sort order, in the current [_window frame_](../invocation-syntax-semantics/#frame-clause-semantics-for-window-functions). If you specify the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) to start at a fixed offset before the current row, then `first_value()` would produce the same result as would the correspondingly parameterized `lag()`. If this is your aim, then you should use `lag()` for clarity. | -| [`nth_value()`](./first-value-nth-value-last-value/#nth-value) | | Y | Returns the specified value from the "_Nth"_ row, in the specified sort order, in the current [_window frame_](../invocation-syntax-semantics/#frame-clause-semantics-for-window-functions). The second, mandatory, parameter specifies _"N"_ in _"Nth"_. | -| [`last_value()`](./first-value-nth-value-last-value/#last-value) | | Y | Returns the specified value from the last row, in the specified sort order, in the current [_window frame_](../invocation-syntax-semantics/#frame-clause-semantics-for-window-functions). | -| [`lag()`](./lag-lead/#lag) | | | Returns, for the current row, the designated value from the row in the ordered input set that is _"lag_distance "_ rows before it. The data type of the _return value_ matches that of the _input value_. `NULL` is returned when the value of _"lag_distance"_ places the earlier row before the start of the [_window_](../invocation-syntax-semantics/#the-window-definition-rule). Use the optional last parameter to specify the value to be returned, instead of `NULL`, when the looked-up row falls outside of the current [_window_](../invocation-syntax-semantics/#the-window-definition-rule). | -| [`lead()`](./lag-lead/#lead) | | | Returns, for the current row, the designated value from the row in the ordered input set that is _"lead_distance"_ rows after it. The data type of the _return value_ matches that of the _input value_. `NULL` is returned when the value of _"lead_distance"_ places the later row after the start of the [_window_](../invocation-syntax-semantics/#the-window-definition-rule). Use the optional last parameter to specify the value to be returned, instead of `NULL`, when the looked-up row falls outside of the current [_window_](../invocation-syntax-semantics/#the-window-definition-rule). | diff --git a/docs/content/preview/api/ysql/exprs/window_functions/function-syntax-semantics/data-sets/_index.md b/docs/content/preview/api/ysql/exprs/window_functions/function-syntax-semantics/data-sets/_index.md deleted file mode 100644 index cdb48e0d4750..000000000000 --- a/docs/content/preview/api/ysql/exprs/window_functions/function-syntax-semantics/data-sets/_index.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -title: Example data sets for window-functions -linkTitle: Tables for the code examples -headerTitle: The data sets used by the code examples -description: This section describes and presents the code to create, selection of four data sets for the code examples that illustrate the use of window functions. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: data-sets - parent: window-function-syntax-semantics - weight: 50 -type: indexpage ---- - -These four pages: - -- [table t1](./table-t1/) -- [table t2](./table-t2/) -- [table t3](./table-t3/) -- [table t4](./table-t4/) - -contain scripts to create and populate tables with data sets that are useful for demonstrating window function semantics. - -Each table uses a surrogate `uuid` primary key whose values are provided by the function `gen_random_uuid()`, brought by the `pgcrypto` extension. The procedure to populate table _"t4"_ also uses the function `normal_rand()`, brought by the `tablefunc` extension. These extensions are described in the sections [pgcrypto](../../../../../../explore/ysql-language-features/pg-extensions/extension-pgcrypto) and [tablefunc](../../../../../../explore/ysql-language-features/pg-extensions/extension-tablefunc). Each is a pre-bundled extension. This means that the installation for each will work without any preparatory steps, as long as you install them as a `superuser` like this: - -```plpgsql -create extension pgcrypto; -create extension tablefunc; -``` - -If you plan to run the code samples in this main "_Window functions"_ section on your laptop using a YugabyteDB cluster that you've created for your own personal use, then you probably have already adopted the habit of running any and all _ad hoc_ tests as a `superuser`. If so, then simply install the [pgcrypto](../../../../../../explore/ysql-language-features/pg-extensions/extension-pgcrypto) and [tablefunc](../../../../../../explore/ysql-language-features/pg-extensions/extension-tablefunc) extensions just as you'd do anything else and then create the tables _"t1"_, _"t2"_, _"t3"_, and _"t4"_. - -{{< note title="Note 1: about the installation of extensions" >}} - -If you've established the practice of creating different databases within your cluster for different purposes, and within a database using a regime of different users that own different schemas to model how a real-world application would be organized, then you'll doubtless want to create and install the extensions in a dedicated central schema and ensure that this is in the _search path_ for all ordinary users. - -{{< /note >}} - -{{< note title="Note 2: about the use of the gen_random_uuid() function" >}} - -Yugabyte recommends that, when you want a self-populating surrogate primary key column, you should use the approach shown here for the test tables, like this: - -```sql -create table my_table( - k uuid default gen_random_uuid() primary key, ... -``` - -This is preferred to using, for example `serial` or `bigserial`, like this: - -```sql -create table t4( - k serial primary key, ... -``` - -(This approach that is common, and that works well, in PostgreSQL—a monolithic SQL database.) This is because `serial` and `bigserial` use a `SEQUENCE` to generate unique values, and this involves expensive coordination between the nodes in a YugabyteDB cluster. In contrast, any invocation of `gen_random_uuid()` on any node, will reliably produce a new globally unique value entirely algorithmically. This brings a noticeable performance benefit. - -The tables _"t1"_, _"t2"_, and _"t3"_ have only a handful of rows and so this performance benefit is well below the noise level. But [table _"t4"_](./table-t4/) is populated using a purpose-written procedure parameterized with the number of rows to create. You get the most convincing demonstration effect with a large number, like _100,000_, rows. - -You can expect to see that populating the table "t4" if you use `gen_random_uuid()` is about _20x_ faster than if you use a sequence. - -{{< /note >}} - -Each of the tables _"t1"_, _"t2"_, _"t3"_, and _"t4"_ is populated so that the values of interest for the demonstrations come back in random order (as you are taught to expect) when a query has no `ORDER BY` clause. This takes just a little programming effort for the tables _"t1"_, _"t2"_, and _"t3"_. Effort is needed because, following a bulk insert into a newly-created table, queries with no `ORDER BY` clause tend to see the rows come back in the order in which they are inserted. And the `INSERT` statements for the tables _"t1"_, _"t2"_, and _"t3"_ explicitly list the to-be-inserted values in an intuitive order where they increase monotonically. No effort is needed for table _"t4"_ because the values of interest are generated by `normal_rand()`—which generates its values in a random order. - -Deliberately subverting this tendency (that when rows are inserted naïvely, as they usually are for functionality demonstrations, they come back in a "natural" order, even with no `ORDER BY`) allows a vivid demonstration of the fact that if the [`window_definition`](../../../../syntax_resources/grammar_diagrams/#window-definition) that's used to invoke _any_ window function has no window`ORDER BY` clause (even if there is such a clause at overall query level), then the results are unpredictable and therefore meaningless. This is demonstrated in the section [Showing the importance of the window ORDER BY clause](../../functionality-overview/#showing-the-importance-of-the-window-order-by-clause). (There are cases where the order doesn't matter—for example, when the set of rows is the input to a conventionally invoked aggregate function.) - -{{< note title="Save a script to (re)create all four test tables." >}} - -It's a good idea to save a script that you can use quickly and effortlessly to create the test tables should you lose, or change, them. This can happen easily with a YugabyteDB cluster on your laptop that you use for all sorts of _ad hoc_ tests. For example, it takes only moments to destroy and re-create the cluster (or even to upgrade to a new version of YugabyteDB)—and this is common practice for certain kinds of test. - - Of course, you must first visit each of these pages: - -- [table t1](./table-t1/) -- [table t2](./table-t2/) -- [table t3](./table-t3/) -- [table t4](./table-t4/) - -and save each of the scripts that these present onto the same directory where you save the "Master" installation script. - -Save this script as, for example, `install_all_tables.sql`: - -```plpgsql --- You can run this script time and again. It will always finish silently. - -\i t1.sql -\echo 't1 done' - -\i t2.sql -\echo 't2 done' - -\i t3.sql -\echo 't3 done' - -\i t4_1.sql -\i t4_2.sql -\echo 't4 done' -``` - -Then you can simply do this whenever you need to re-establish the state that the code examples rely on: - -```plpgsql -\i install_all_tables.sql -``` - -It takes only a few seconds to finish. Each of the scripts `t1.sql`, `t2.sql`, `t3.sql`, `t4_1.sql`, and `t4_2.sql` that it runs is designed to be able to be repeated. Each finishes silently on its first and all subsequent runs. - -{{< /note >}} diff --git a/docs/content/preview/api/ysql/exprs/window_functions/functionality-overview.md b/docs/content/preview/api/ysql/exprs/window_functions/functionality-overview.md deleted file mode 100644 index 8a0c1cc5366b..000000000000 --- a/docs/content/preview/api/ysql/exprs/window_functions/functionality-overview.md +++ /dev/null @@ -1,406 +0,0 @@ ---- -title: Window function invocation using the OVER clause -linkTitle: Informal functionality overview -headerTitle: Informal overview of window function invocation using the OVER clause -description: This section provides an informal introduction to the invocation of window functions and aggregate functions using the OVER clause. -menu: - preview_api: - identifier: window-functions-functionality-overview - parent: window-functions - weight: 10 -type: docs ---- - -A good sense of the general functionality of window functions is given by examples that use [`row_number()`](../function-syntax-semantics/row-number-rank-dense-rank/#row-number), [`nth_value()`](../function-syntax-semantics/first-value-nth-value-last-value/#nth-value), [`last_value()`](../function-syntax-semantics/first-value-nth-value-last-value/#last-value), [`lag()`](../function-syntax-semantics/lag-lead/#lag), and [`lead()`](../function-syntax-semantics/lag-lead/#lead). - -Aggregate functions can be invoked with the `OVER` clause. Examples are given using `avg()` and `sum()`. - -These examples are sufficient to give a general sense of the following notions: - -- how window functions are invoked, and their general semantics -- the three clauses of the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) : the `PARTITION BY` clause, the window `ORDER BY` clause, and the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) -- how an aggregate function gains different useful functionality when it's invoked using an `OVER` clause rather than (as is probably more common) in conjunction with the regular `GROUP BY` clause. - -{{< note title=" " >}} - -If you haven't yet installed the tables that the code examples use, then go to the section [The data sets used by the code examples](../function-syntax-semantics/data-sets/). - -{{< /note >}} - -## Using row_number() in the simplest way - -The [`row_number()`](../function-syntax-semantics/row-number-rank-dense-rank/#row-number) window function is the simplest among the set of eleven such functions that YSQL supports. Briefly, this function assigns an ordinal number, starting at _1_, to the rows within the specified [_window_](../invocation-syntax-semantics/#the-window-definition-rule) according to the specified ordering rule. Here is the most basic example. - -```plpgsql -select - k, - row_number() over(order by k desc) as r -from t1 -order by k asc; -``` - -The syntax and semantics of the `ORDER BY` clause, within the parentheses of the `OVER` clause, are identical to what you're used to when an `ORDER BY` clause is used after the `FROM` clause in a subquery. The `DESC` keyword is used in this example to emphasize this point. It says that the values returned by [`row_number()`](../function-syntax-semantics/row-number-rank-dense-rank/#row-number) are to be assigned in the order corresponding to sorting the values of _"k"_ in descending order—and it specifies nothing else. Here is the result: - -``` - k | r -----+---- - 1 | 25 - 2 | 24 - 3 | 23 - 4 | 22 - 5 | 21 - ... - 21 | 5 - 22 | 4 - 23 | 3 - 24 | 2 - 25 | 1 -``` - -The output lines for values of _"r"_ between _6_ and _20_ were manually removed to reduce the clutter. - -Because the `OVER` clause doesn't specify a `PARTITION BY` clause, the so-called [_window_](../invocation-syntax-semantics/#the-window-definition-rule) that [`row_number()`](../function-syntax-semantics/row-number-rank-dense-rank/#row-number) operates on coincides with all of the rows in table _"t1"_. - -The next example emphasizes the point that a window function is often used in a subquery which, like any other subquery, is used to define a `WITH` clause view to allow further logic to be applied—in this case, a `WHERE` cause restriction on the values returned by [`row_number()`](../function-syntax-semantics/row-number-rank-dense-rank/#row-number) (and, of course, a final query-level `ORDER BY` rule). - -```plpgsql -with v as ( - select - k, - row_number() over(order by k desc) as r - from t1) -select - k, - r -from v -where (r between 1 and 5) or (r between 21 and 25) -order by r asc; -``` - -This is the result: - -``` - k | r -----+---- - 25 | 1 - 24 | 2 - 23 | 3 - 22 | 4 - 21 | 5 - 5 | 21 - 4 | 22 - 3 | 23 - 2 | 24 - 1 | 25 -``` - -## Showing the importance of the window ORDER BY clause - -Here is a counter example. Notice that the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) doesn't specify a window `ORDER BY` clause. - -```plpgsql -with a as ( - select - -- The use of the bare OVER() here brings meaningless results. - row_number() over () as r, - class, - k - from t1) -select - r, - class, - k, - case k=r - when true then 'true' - else '' - end as chk -from a -order by r; -``` - -To see the most dramatic effect of the unpredictability of the result set, save the code from [table t1](../function-syntax-semantics/data-sets/table-t1/) into a file called, say, _"unpredictable.sql"_. Then copy the SQL statement, above, at the end of this file and invoke it time and again in `ysqlsh`. Here is a typical result: - -``` - r | class | k | chk -----+-------+----+------ - 1 | 5 | 23 | - 2 | 5 | 25 | - 3 | 2 | 9 | - 4 | 1 | 4 | true - 5 | 3 | 11 | - 6 | 1 | 1 | - 7 | 3 | 13 | - 8 | 4 | 16 | - 9 | 1 | 2 | - 10 | 2 | 7 | - 11 | 1 | 3 | - 12 | 4 | 18 | - 13 | 3 | 15 | - 14 | 5 | 21 | - 15 | 3 | 14 | - 16 | 3 | 12 | - 17 | 4 | 17 | true - 18 | 1 | 5 | - 19 | 2 | 10 | - 20 | 4 | 20 | true - 21 | 5 | 24 | - 22 | 5 | 22 | true - 23 | 2 | 6 | - 24 | 2 | 8 | - 25 | 4 | 19 | -``` - -Sometimes, you'll see that, by chance, not a single output row is marked _"true"_. Sometimes, you'll see that a few are so marked. - -## Using row_number() with "PARTITION BY" - -This example adds a `PARTITION BY` clause to the window `ORDER BY` clause in the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) . It selects and orders by _"v"_ rather than _"k"_ because this has `NULL`s and demonstrates the within-[_window_](../invocation-syntax-semantics/#the-window-definition-rule) effect of `NULLS FIRST`. The [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) is moved to a dedicated `WINDOW` clause that names it so that the `OVER` clause can simply reference the definition that it needs. This might seem only to add verbosity in this example. But using a dedicated `WINDOW` clause reduces verbosity when invocations of several different window functions in the same subquery use the same [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition). - -```plpgsql -\pset null '??' - -with a as ( - select - class, - v, - row_number() over w as r - from t1 - window w as (partition by class order by v desc nulls first)) -select - class, - v, - r -from a -where class in (2, 4) -order by class, r; -``` - -This is the result: - -``` - class | v | r --------+----+--- - 2 | ?? | 1 - 2 | 9 | 2 - 2 | 8 | 3 - 2 | 7 | 4 - 2 | 6 | 5 - 4 | ?? | 1 - 4 | 19 | 2 - 4 | 18 | 3 - 4 | 17 | 4 - 4 | 16 | 5 -``` - -## Using nth_value() and last_value() to return the whole row - -If you want the output value for any of `first_value()`, `last_value()`, `nth_value()`, `lag()`, or `lead()` to include more than one column, then you must list them in a _"row"_ type constructor. This example uses [`nth_value()`](../function-syntax-semantics/first-value-nth-value-last-value/#nth-value). This accesses the _Nth_ row within the ordered set that each [_window_](../invocation-syntax-semantics/#the-window-definition-rule) defines. It picks out the third row. The restriction _"class in (3, 5)"_ cuts down the result set to make it easier to read. - -```plpgsql -drop type if exists rt cascade; -create type rt as (class int, k int, v int); - -select - class, - nth_value((class, k, v)::rt, 3) over w as nv -from t1 -where class in (3, 5) -window w as ( - partition by class - order by k - range between unbounded preceding and unbounded following - ) -order by class; -``` - -It produces this result: - -``` - class | nv --------+----------- - 3 | (3,13,13) - 3 | (3,13,13) - 3 | (3,13,13) - 3 | (3,13,13) - 3 | (3,13,13) - 5 | (5,23,23) - 5 | (5,23,23) - 5 | (5,23,23) - 5 | (5,23,23) - 5 | (5,23,23) -``` - -Each of `first_value()`, `last_value()`, and `nth_value()`, as their names suggest, produces the same output for each row of a [_window_](../invocation-syntax-semantics/#the-window-definition-rule). It would be natural, therefore, to use the query above in a `WITH` clause whose final `SELECT` picks out the individual columns from the record and adds a `GROUP BY` clause, thus: - -```plpgsql -drop type if exists rt cascade; -create type rt as (class int, k int, v int); - -\pset null '??' -with a as ( - select - last_value((class, k, v)::rt) over w as lv - from t1 - window w as ( - partition by class - order by k - range between unbounded preceding and unbounded following)) -select - (lv).class, - (lv).k, - (lv).v -from a -group by class, k, v -order by class; -``` - -This example uses [`last_value()`](../function-syntax-semantics/first-value-nth-value-last-value/#last-value) because the data set has different values for _"k"_ and _"v"_ for the last row in each [_window_](../invocation-syntax-semantics/#the-window-definition-rule). This is the result: - -``` - class | k | v --------+----+---- - 1 | 5 | ?? - 2 | 10 | ?? - 3 | 15 | ?? - 4 | 20 | ?? - 5 | 25 | ?? -``` - -## Using lag() and lead() to compute a moving average - -The aim is to compute the moving average for each day within the window, where this is feasible, over the last-but one day, the last day, the current day, the next day, and the next-but-one day. - -Notice that the following section uses the aggregate function `avg()` to produce the same result, and it shows the advantages of that approach over using the window functions [`lag()`](../function-syntax-semantics/lag-lead/#lag) and [`lead()`](../function-syntax-semantics/lag-lead/#lead). There are many other cases where `lag()` and/or `lead()`are needed and where `avg()` is of no use. The present use case was chosen here because it shows very clearly what `lag()` and `lead()` do and, especially, because it allows the demonstration of invoking an aggregate function with an `OVER` clause. - -The query is specifically written to meet the exact requirements. It would need to be manually re-written to base the moving average on a bigger, or smaller, range of days. Notice that the same [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) , _"w"_, is used as the argument for each of the four uses of the `OVER` clause. This is where using a separate `WINDOW` clause delivers its intended benefit. - -The statement of requirement implies that the computation is not feasible for the first two and the last two days in the window. Under these circumstances, `lag()` and `lead()`, return `NULL`—or, it you prefer, a default value that you supply using an optional third parameter. See the dedicated section on [`lag()` and `lead()`](../function-syntax-semantics/lag-lead/) for details. - -```plpgsql -with v as ( - select - day, - lag (price::numeric, 2) over w as lag_2, - lag (price::numeric, 1) over w as lag_1, - price::numeric, - lead(price::numeric, 1) over w as lead_1, - lead(price::numeric, 2) over w as lead_2 - from t3 - window w as (order by day)) -select - to_char(day, 'Dy DD-Mon') as "Day", - ((lag_2 + lag_1 + price + lead_1 + lead_2)/5.0)::money as moving_avg -from v -where (lag_2 is not null) and (lead_2 is not null) -order by day; -``` - -This is the result: - -``` - Day | moving_avg -------------+------------ - Wed 17-Sep | $18.98 - Thu 18-Sep | $19.13 - Fri 19-Sep | $19.27 - Mon 22-Sep | $19.64 - Tue 23-Sep | $19.99 - Wed 24-Sep | $20.10 - Thu 25-Sep | $19.90 - Fri 26-Sep | $19.62 - Mon 29-Sep | $19.60 - Tue 30-Sep | $19.41 - Wed 01-Oct | $19.18 - Thu 02-Oct | $19.08 - Fri 03-Oct | $18.78 - Mon 06-Oct | $18.19 - Tue 07-Oct | $17.53 - Wed 08-Oct | $16.97 - Thu 09-Oct | $17.08 - Fri 10-Oct | $17.26 - Mon 13-Oct | $17.08 - Tue 14-Oct | $17.23 - Wed 15-Oct | $17.30 -``` - -## Using the aggregate function avg() to compute a moving average - -This solution takes advantage of this [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) to determine the rows that `avg()` uses: - -``` -order by day groups between $1 preceding and $1 following -``` - -Here, the statement is first prepared and then executed to emphasize the fact that a single formulation of the statement text works for any arbitrary range of days around the current row. The section [Window function invocation—SQL syntax and semantics](../invocation-syntax-semantics/) explains the full power of expression brought by the `OVER` clause. - -Notice that this approach uses the value returned by [`row_number()`](../function-syntax-semantics/row-number-rank-dense-rank/#row-number), using an `OVER` clause that does no more than order the rows, to exclude the meaningless first _N_ and last _N_ averages, where _N_ is the same parameterized value that _"groups between N preceding and N following"_ uses. These rows, if not excluded, would simply show the averages over the rows that allow access. You probably don't want to see those answers. - -```plpgsql -prepare stmt(int) as -with v as ( - select - day, - avg(price::numeric) over w1 as a, - row_number() over w2 as r - from t3 - window - w1 as (order by day groups between $1 preceding and $1 following), - w2 as (order by day)) -select - to_char(day, 'Dy DD-Mon') as "Day", - a::money as moving_avg -from v -where r between ($1 + 1) and (select (count(*) - $1) from v) -order by day; - -execute stmt(2); -``` - -The result is identical to that produced by the `lag()`/`lead()` approach. Try repeating the `EXECUTE` statement with a few different actual arguments. The bigger it gets, the fewer result rows you see, and the closer the values of the moving average get to each other. - -## Using the aggregate function sum() with the OVER clause - -This example shows a different spelling of the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause): - -``` -range between unbounded preceding and current row -``` - -so that the average includes, for each row, the row itself and only the rows that precede it in the sort order. - -```plpgsql -with v as ( - select - class, - k, - sum(k) over w as s - from t1 - window w as ( - partition by class - order by k - range between unbounded preceding and current row)) -select - class, - k, - s -from v -where class in (2, 4) -order by class, k; -``` - -This is the result: - -``` - class | k | s --------+----+---- - 2 | 6 | 6 - 2 | 7 | 13 - 2 | 8 | 21 - 2 | 9 | 30 - 2 | 10 | 40 - 4 | 16 | 16 - 4 | 17 | 33 - 4 | 18 | 51 - 4 | 19 | 70 - 4 | 20 | 90 -``` diff --git a/docs/content/preview/api/ysql/exprs/window_functions/invocation-syntax-semantics.md b/docs/content/preview/api/ysql/exprs/window_functions/invocation-syntax-semantics.md deleted file mode 100644 index 26bd1d64d123..000000000000 --- a/docs/content/preview/api/ysql/exprs/window_functions/invocation-syntax-semantics.md +++ /dev/null @@ -1,294 +0,0 @@ ---- -title: Window function syntax and semantics -linkTitle: Invocation syntax and semantics -headerTitle: Window function invocation—SQL syntax and semantics -description: This section specifies the syntax and semantics of the OVER clause and the WINDOW clause. You may also invoke aggregate functions t_is way. -menu: - preview_api: - identifier: window-functions-aggregate-functions-syntax-semantics - parent: window-functions - weight: 20 -type: docs ---- - -{{< note title="The rules described in this section also govern the invocation of aggregate functions." >}} - -The dedicated [Aggregate functions](../../aggregate_functions/) section explains that one kind of aggregate function—so-called ordinary aggregate functions, exemplified by [`avg()`](../../aggregate_functions/function-syntax-semantics/avg-count-max-min-sum/#avg) and [`count()`](../../aggregate_functions/function-syntax-semantics/avg-count-max-min-sum/#count)—can optionally be invoked using the identical syntax that you use to invoke window functions. That dedicated section has many examples. See also the sections [Using the aggregate function avg() to compute a moving average](../functionality-overview/#using-the-aggregate-function-avg-to-compute-a-moving-average) and [Using the aggregate function sum() with the OVER clause](../functionality-overview/#using-the-aggregate-function-sum-with-the-over-clause) in the present Window functions main section. -{{< /note >}} - -{{< note title="A note on orthography" >}} - -Notice these three different orthography styles: - -- `OVER` is a keyword that names a clause. You write such a keyword in a SQL statement. - -- [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) is the name of a rule within the overall SQL grammar. You never type such a name in a SQL statement. It is written in bold lower case with underscores, as appropriate, between the English words. Because such a rule is always shown as a link, you can jump directly to the rule in the [Grammar Diagrams](../../../syntax_resources/grammar_diagrams/#abort) page. This page shows every single one of the SQL rules. It so happens that the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) rule starts with the keyword `WINDOW` and might therefore, according to the context of use, be referred to alternatively as the `WINDOW` clause. - -- [_window frame_](./#frame-clause-semantics-for-window-functions) is a pure term of art. It is written in italic lower case with spaces, as appropriate, between the English words. You neither write it in a SQL statement nor use it to look up anything in the [Grammar Diagrams](../../../syntax_resources/grammar_diagrams/#abort) page. Because such a term of art is always shown as a link, you can jump directly to its definition within this _"Window function invocation—SQL syntax and semantics"_ page. - -{{< /note >}} - -## Syntax - -### Reproduced from the SELECT statement section - -The following three diagrams, [`select_start`](../../../syntax_resources/grammar_diagrams/#select-start), [`WINDOW` clause](../../../syntax_resources/grammar_diagrams/#window-clause), and [`fn_over_window`](../../../syntax_resources/grammar_diagrams/#fn-over-window) rule, are reproduced from the section that describes the [`SELECT` statement](../../../the-sql-language/statements/dml_select/). - -{{%ebnf localrefs="window_definition"%}} -select_start, -window_clause, -fn_over_window -{{%/ebnf%}} - -### Definition of the window_definition rule - -As promised in the `SELECT` statement section, this section explains the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) rule and its use as the argument of either the `OVER` keyword or the `WINDOW` keyword. - -A [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) can be used only at these two syntax spots, within the enclosing syntax of a subquery. - -{{%ebnf localrefs="frame_clause" %}} -window_definition -{{%/ ebnf %}} - -### The frame_clause - -{{% ebnf %}} -frame_clause, -frame_bounds, -frame_start, -frame_end, -frame_bound, -frame_exclusion -{{%/ ebnf %}} - -## Semantics - -### The fn_over_window rule - -A window function can be invoked only at the syntax spot in a subquery that the diagram for the [`select_start`](../../../syntax_resources/grammar_diagrams/#select-start) rule shows. An [aggregate function](../../aggregate_functions/) _may_ be invoked in this way as an alternative to its more familiar invocation as a regular `SELECT` list item in conjunction with the `GROUP BY` clause. (The invocation of an aggregate function in conjunction with the `GROUP BY` clause is governed by the `ordinary_aggregate_fn_invocation` rule or the `within_group_aggregate_fn_invocation` rule.) - -The number, data types, and meanings of a window function's formal parameters are function-specific. The eleven window functions are classified into functional groups, and summarized, in the two tables at the end of the section [Signature and purpose of each window function](../function-syntax-semantics/). Each entry links to the formal account of the function which also provides runnable code examples. - -Notice that, among the dedicated window functions (as opposed to aggregate functions that may be invoked as window functions), only [`ntile()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#ntile) takes an argument. Every other dedicated window function is invoked with an empty parentheses pair. Some aggregate functions (like, for example, [`jsonb_object_agg()`](../../aggregate_functions/function-syntax-semantics/array-string-jsonb-jsonb-object-agg/#jsonb-object-agg)) take more than one argument. When an aggregate function is invoke as a window function, the keyword `DISTINCT` is not allowed within the parenthesized list of arguments. The attempt causes this error: - -``` -0A000: DISTINCT is not implemented for window functions -``` - -### The window_definition rule - -The syntax diagram for the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) shows that it uses three complementary specifications: - -- The `PARTITION BY` clause defines the maximal subsets, of what the subquery-level `WHERE` clause defines, that are operated upon, in turn, by a window function (or by an aggregate function in window mode). Tautologically, this maximal subset is referred to as the [_window_](./#the-window-definition-rule). In the limit, when the `PARTITION BY` clause is omitted, the maximal subset is identical with what the `WHERE` clause defines. -- The window `ORDER BY` clause defines how the rows are to be ordered within the [_window_](./#the-window-definition-rule). -- The [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) defines a further narrowing of the [_window_](./#the-window-definition-rule), referred to as the [_window frame_](./#frame-clause-semantics-for-window-functions). The [_window frame_](./#frame-clause-semantics-for-window-functions) is anchored to the current row within the [_window_](./#the-window-definition-rule). In the degenerate case, the [_window frame_](./#frame-clause-semantics-for-window-functions) coincides with the [_window_](./#the-window-definition-rule) and is therefore insensitive to the position of the current row. - -In summary, the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) defines the [_window_](./#the-window-definition-rule) as the scope within which a function's meaning (window function or aggregate function in window mode) is defined. The [_window_](./#the-window-definition-rule) is then further characterized by the ordering of its rows, the extent of the [_window frame_](./#frame-clause-semantics-for-window-functions), and how this moves with the current row. - -### The FILTER clause - -The `FILTER` clause's `WHERE` clause has the same syntax and semantics as it does at the regular `WHERE` clause syntax spot immediately after a subquery's `FROM` list. Notice that the `FILTER` clause is legal only for the invocation of an aggregate function. Here is an example: - -```plpgsql -select - class, - k, - count(*) - filter(where k%2 = 0) - over (partition by class) - as n -from t1 -order by class, k; -``` - -If you want to run this, then create a data set using the `ysqlsh` script that [table t1](../function-syntax-semantics/data-sets/table-t1/) presents. - -Using the `FILTER` clause in the invocation of a window function causes this compilation error: - -``` -0A000: FILTER is not implemented for non-aggregate window functions -``` - -### The PARTITION BY clause - -The `PARTITION BY` clause groups the rows that the subquery defines into [_windows_](./#the-window-definition-rule), which are processed separately by the window function. (This holds, too, when an aggregate function is invoked in this way.) It works similarly to a query-level `GROUP BY` clause, except that its expressions are always just expressions and cannot be output-column names or numbers. If the `PARTITION BY` clause is omitted, then all rows are treated as a single [_window_](./#the-window-definition-rule). - -### The window ORDER BY clause - -The window `ORDER BY` clause determines the order in which the rows of a [_window_](./#the-window-definition-rule) are processed by the window function. It works similarly to a query-level `ORDER BY` clause; but it cannot use output-column names or numbers. If the window `ORDER BY` clause is omitted, then rows are processed in an unspecified order so that the results of any window function invoked in this way would be unpredictable and therefore meaningless. Aggregation functions invoked in this way might be sensitive to what the window `ORDER BY` clause says. This will be the case when, for example, the [_window frame_](./#frame-clause-semantics-for-window-functions) is smaller than the whole [_window_](./#the-window-definition-rule) and moves with the current row. The section [Using the aggregate function avg() to compute a moving average](../functionality-overview/#using-the-aggregate-function-avg-to-compute-a-moving-average) provides an example. - -### The frame_clause - -The [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) has many variants. Only one basic variant is needed in the `OVER` clause that you use to invoke a window function. The other variants are useful in the `OVER` clause that you use to invoke an aggregate function. For completeness, those variants are described on this page. - -#### frame_clause semantics for window functions - -The [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) specifies the set of rows constituting the so-called [_window frame_](./#frame-clause-semantics-for-window-functions). In general, this will be a subset of the rows in the current [_window_](./#the-window-definition-rule). Look at the two tables at the end of the section [Signature and purpose of each window function](../function-syntax-semantics/). - -- The functions in the first group, [Window functions that return an "int" or "double precision" value as a "classifier" of the rank of the row within its window](../function-syntax-semantics/#window-functions-that-return-an-int-or-double-precision-value-as-a-classifier-of-the-rank-of-the-row-within-its-window), are not sensitive to what the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) specifies and always use all of the rows in the current [_window_](./#the-window-definition-rule). Yugabyte recommends that you therefore omit the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) in the `OVER` clause that you use to invoke these functions. - -- The functions in the second group, [Window functions that return columns of another row within the window](../function-syntax-semantics/#window-functions-that-return-column-s-of-another-row-within-the-window), make obvious sense when the scope within which the specified row is found is the entire [_window_](./#the-window-definition-rule). If you have one of the very rare use cases where the output that you want is produced by a different [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause), then specify what you want explicitly. Otherwise, because it isn't the default, you must specify that the [_window frame_](./#frame-clause-semantics-for-window-functions) includes the entire current [_window_](./#the-window-definition-rule) like this: - - ``` - range between unbounded preceding and unbounded following - ``` - -Use cases where the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause)'s many other variants are useful arise when an aggregate function is invoked using the `OVER` clause. One example is given in the section [Using the aggregate function `avg()` to compute a moving average](../#using-the-aggregate-function-avg-to-compute-a-moving-average). Another example, that uses `count(*)`, is given in the code that explains the meaning of the [`percent_rank()`](../function-syntax-semantics/percent-rank-cume-dist-ntile/#percent-rank) function. Otherwise, see the main [Aggregate functions](../../aggregate_functions/) section. - -#### frame_clause semantics for aggregate functions - -The [_window frame_](./#frame-clause-semantics-for-window-functions) can be specified in `RANGE`, `ROWS` or `GROUPS` mode; in each case, it runs from the [`frame_start`](../../../syntax_resources/grammar_diagrams/#frame-start) to the [`frame_end`](../../../syntax_resources/grammar_diagrams/#frame-end). If [`frame_end`](../../../syntax_resources/grammar_diagrams/#frame-end) is omitted, then the end defaults to `CURRENT ROW`. - -A [`frame_start`](../../../syntax_resources/grammar_diagrams/#frame-start) of `UNBOUNDED PRECEDING` means that the [_window frame_](./#frame-clause-semantics-for-window-functions) starts with the first row of the [_window_](./#the-window-definition-rule). Similarly, a [`frame_end`](../../../syntax_resources/grammar_diagrams/#frame-end) of `UNBOUNDED FOLLOWING` means that the [_window frame_](./#frame-clause-semantics-for-window-functions) ends with the last row of the [_window_](./#the-window-definition-rule). - -In `RANGE` or `GROUPS` mode, a [`frame_start`](../../../syntax_resources/grammar_diagrams/#frame-start) of `CURRENT ROW` means that the [_window frame_](./#frame-clause-semantics-for-window-functions) starts with the first member of the current row's _peer group_. A _peer group_ is a set of rows that the window `ORDER BY` clause sorts with the same rank as the current row. And a [`frame_end`](../../../syntax_resources/grammar_diagrams/#frame-end) of `CURRENT ROW` means that the [_window frame_](./#frame-clause-semantics-for-window-functions) ends with the last row in the current row's _peer group_. In `ROWS` mode, `CURRENT ROW` simply means the current row. - -For the [`offset`](../../../syntax_resources/grammar_diagrams/#offset) `PRECEDING` and [`offset`](../../../syntax_resources/grammar_diagrams/#offset) `FOLLOWING` modes of the [`frame_start`](../../../syntax_resources/grammar_diagrams/#frame-start) and [`frame_end`](../../../syntax_resources/grammar_diagrams/#frame-end) clauses, the [`offset`](../../../syntax_resources/grammar_diagrams/#offset) argument must be an expression that doesn't include any variables, aggregate functions, or window functions. The meaning of the [`offset`](../../../syntax_resources/grammar_diagrams/#offset) value depends on the `RANGE | ROWS | GROUPS` mode: - -- In `ROWS` mode, the [`offset`](../../../syntax_resources/grammar_diagrams/#offset) value must be a `NOT NULL`, non-negative integer. This brings the meaning that the [_window frame_](./#frame-clause-semantics-for-window-functions) starts or ends the specified number of rows before or after the current row. - -- In `GROUPS` mode, the [`offset`](../../../syntax_resources/grammar_diagrams/#offset) value must again be a `NOT NULL`, non-negative integer. Here, this brings the meaning that the [_window frame_](./#frame-clause-semantics-for-window-functions) starts or ends the specified number of _peer groups_ before or after the current row's _peer group_. Recall that there's always a logical requirement to include a window `ORDER BY` clause in the window definition that is used to invoke a window function. In `GROUPS` mode, whatever is your intended use of the window definition, you get this error if it doesn't include a window `ORDER BY` clause: - - ``` - 42P20: GROUPS mode requires an ORDER BY clause - ``` - -- In `RANGE` mode, these options require that the window `ORDER BY` clause specify exactly one column. The [`offset`](../../../syntax_resources/grammar_diagrams/#offset) value specifies the maximum difference between the value of that column in the current row and its value in the preceding or following rows of the [_window frame_](./#frame-clause-semantics-for-window-functions). The [`offset`](../../../syntax_resources/grammar_diagrams/#offset) expression must yield a value whose data type depends upon that of the ordering column. For numeric ordering columns (like `int`, `double precision`, and so on), it is typically of the same data type as the ordering column; but for date-time ordering columns it is an `interval`. For example, if the ordering column is `date` or `timestamp`, you could specify `RANGE BETWEEN '1 day' PRECEDING AND '10 days' FOLLOWING`. Here too, the [`offset`](../../../syntax_resources/grammar_diagrams/#offset) value must be `NOT NULL` and non-negative. The meaning of “non-negative” depends on the data type. - -In all cases, the distance to the start and end of the [_window frame_](./#frame-clause-semantics-for-window-functions) is limited by the distance to the start and end of the [_window_](./#the-window-definition-rule), so that for rows near the [_window_](./#the-window-definition-rule) boundaries, the [_window frame_](./#frame-clause-semantics-for-window-functions) might contain fewer rows than elsewhere. - -Notice that in both `ROWS` and `GROUPS` mode, `0 PRECEDING` and `0 FOLLOWING` is equivalent to `CURRENT ROW`. This normally holds in `RANGE` mode too, for an appropriate meaning of “zero” specific to the data type. - -The [`frame_exclusion`](../../../syntax_resources/grammar_diagrams/#frame-exclusion) clause allows rows around the current row to be excluded from the [_window frame_](./#frame-clause-semantics-for-window-functions), even if they would be included according to what the [`frame_start`](../../../syntax_resources/grammar_diagrams/#frame-start) and [`frame_end`](../../../syntax_resources/grammar_diagrams/#frame-end) clauses say. - -- `EXCLUDE CURRENT ROW` excludes the current row from the [_window frame_](./#frame-clause-semantics-for-window-functions). -- `EXCLUDE GROUP` excludes all the rows in the current row's _peer group_. -- `EXCLUDE TIES` excludes any peers of the current row, but not the current row itself. -- `EXCLUDE NO OTHERS` simply specifies explicitly the default behavior of not excluding the current row or its peers. - -Omitting the [`frame_clause`](../../../syntax_resources/grammar_diagrams/#frame-clause) is the same as specifying - -``` -RANGE UNBOUNDED PRECEDING -``` - -and this means the same as - -``` -RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW -``` - -If the window `ORDER BY` clause is specified, then this default meaning sets the [_window frame_](./#frame-clause-semantics-for-window-functions) to be all rows from the [_window_](./#the-window-definition-rule) start up through the last row in the current row's _peer group_. And if the window `ORDER BY` clause is omitted this means that all rows of the [_window_](./#the-window-definition-rule) are included in the [_window frame_](./#frame-clause-semantics-for-window-functions), because all rows become peers of the current row. - -**Notes:** - -- The [`frame_start`](../../../syntax_resources/grammar_diagrams/#frame-start) clause cannot be `UNBOUNDED FOLLOWING` -- The [`frame_end`](../../../syntax_resources/grammar_diagrams/#frame-end) clause cannot be `UNBOUNDED PRECEDING`, and cannot appear before the [`frame_start`](../../../syntax_resources/grammar_diagrams/#frame-start) clause. - -For example `RANGE BETWEEN CURRENT ROW AND offset PRECEDING` causes this error: - -``` -42P20: frame starting from current row cannot have preceding rows -``` - -However, `ROWS BETWEEN 7 PRECEDING AND 8 PRECEDING` _is_ allowed, even though it would never select any rows. - -If the `FILTER` clause is specified, then only the input rows for which it evaluates to true are fed to the window function; other rows are discarded. As noted above, only window aggregate functions invoked using the `OVER` clause accept a `FILTER` clause. - -## Examples - -### First example - -This shows the use of a window function with an `OVER` clause that directly specifies the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) : - -``` -select - ... - some_window_function(...) over (partition by order by ) as a1, - ... -from ... -``` - -Notice that the syntax spot occupied by _"some_window_function"_ may be occupied only by a window function or an aggregate function. See the section [Informal overview of function invocation using the `OVER` clause](../functionality-overview) for runnable examples of this syntax variant. - -If any other kind of function, for example _"sqrt()"_, occupies this syntax spot, then it draws this specific compilation error: - -``` -42809: OVER specified, but sqrt is not a window function nor an aggregate function -``` - -And if any other expression is used at this syntax spot, then it causes this generic compilation error: - -``` -42601: syntax error at or near "over" -``` - -### Second example - -This shows the use of two window functions with `OVER` clauses that each reference the same [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) that is defined separately in a `WINDOW` clause. - -``` -select - ... - window_fn_1(...) over w as a1, - window_fn_2(...) over w as a2, - ... -from ... -window w as ( - partition by -- PARTITION BY clause - order by -- ORDER BY clause - range between unbounded preceding and unbounded following -- frame_clause - ) -... -``` - -For a runnable example of this syntax variant, see [`first_value()`,`nth_value()`, `last_value()`](../function-syntax-semantics/first-value-nth-value-last-value/). - -Notice that the syntax rules allow both this: - -``` -window_fn_1(...) over w as a1 -``` - -and this: - -``` -window_fn_1(...) over (w) as a1 -``` - -The parentheses around the window's identifier convey no meaning, Yugabyte recommends that you don't use this form because doing so will make anybody who reads your code wonder if it _does_ convey a meaning. - -### Third example - -This shows how a generic [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) that is defined in a `WINDOW` clause is specialized in a particular `OVER` clause that references it. -``` -select - ... - (window_fn(...) over w) as a1, - (aggregate_fn_1(...) over (w range between unbounded preceding and unbounded following)) as a2, - (aggregate_fn_1(...) over (w range between unbounded preceding and current row)) as a3, - ... -from ... -window w as (partition by order by ) -... -``` - -### Fourth example - -This shows how the [`window_definition`](../../../syntax_resources/grammar_diagrams/#window-definition) specialization technique that the third example showed can be used in successively in the `WINDOW` clause. - -``` -select - ... - (window_fn(...) over w1) as a1, - (aggregate_fn_1(...) over w2) as a2, - (aggregate_fn_1(...) over w3) as a3, - ... -from ... -window - w1 as (partition by order by ), - w2 as (w1 range between unbounded preceding and unbounded following), - w3 as (w1 range between unbounded preceding and current row) -``` - -For a runnable example of this fourth syntax variant, see [Comparing the effect of `percent_rank()`, `cume_dist()`, and `ntile()` on the same input](../function-syntax-semantics/percent-rank-cume-dist-ntile/#comparing-the-effect-of-percent-rank-cume-dist-and-ntile-on-the-same-input). diff --git a/docs/content/preview/api/ysql/the-sql-language/_index.md b/docs/content/preview/api/ysql/the-sql-language/_index.md deleted file mode 100644 index 14ed864d777b..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/_index.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: The YugabyteDB SQL language [YSQL] -headerTitle: The YugabyteDB SQL language -linkTitle: The SQL language -description: The YugabyteDB SQL language—DDL; DML; DCL; TCL; session and system control; performance control -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: the-sql-language - parent: api-ysql - weight: 10 -aliases: - - /preview/api/ysql/with-clause/ -type: indexpage -showRightNav: true ---- - -This page describes the categorization scheme for the SQL statements and links to lists of the statements that fall into each category. It also describes notions, like the `WITH` clause, that need careful explanation and have applicability across two or more statement kinds. - -{{< note title="Under construction." >}} - -Future versions of the YSQL documentation will explain further such common notions. - -{{< /note >}} - -## Classification of SQL statements - -### Data definition language (DDL) - -**[DDL](./statements/#data-definition-language-ddl)** statements define the structures in a database, change their definitions, and remove them by using `CREATE`, `ALTER`, and `DROP` commands respectively. - -### Data manipulation language (DML) - -**[DML](./statements/#data-manipulation-language-dml)** statements query and modify the contents of a database. - -### Data control language (DCL) - -**[DCL](./statements/#data-control-language-dcl)** statements protect the definitions of database objects and the data the tables store using a regime of rules and privileges that control the scope and power of DDL and DML statements. - -### Transaction control language (TCL) - -**[TCL](./statements/#transaction-control-language-tcl)** statements manage transactions of operations on the database. - -### Session and system control - -**[Statements in this class](./statements/#session-and-system-control)** allow database parameters to be set at the session or the system level. - -### Performance control - -**[Statements in this class](./statements/#performance-control)** support the preparation of SQL statements, and their subsequent execution, to allow a more efficient execution by _binding_ actual arguments to placeholders in a SQL statement that is compiled just once, per session, rather than _every_ time actual arguments are presented. The canonical example of this feature is provided by the actual arguments that a `WHERE` clause restriction uses or the actual values than an `INSERT` statement will use. - -In the performance control class, the [`EXPLAIN`](./statements/perf_explain/) statement shows what access methods a DML statement will use and (for statements with joins) the join order and method. - -## The WITH clause - -**[The `WITH` clause](./with-clause/)** (sometimes known as the _common table expression_) can be used as part of a `SELECT` statement, an `INSERT` statement, an `UPDATE` statement, or a `DELETE` statement. For this reason, the functionality is described in a dedicated section. diff --git a/docs/content/preview/api/ysql/the-sql-language/creating-and-using-temporary-schema-objects/_index.md b/docs/content/preview/api/ysql/the-sql-language/creating-and-using-temporary-schema-objects/_index.md deleted file mode 100644 index 4b0e6e125577..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/creating-and-using-temporary-schema-objects/_index.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Creating and using temporary schema-objects [YSQL] -headerTitle: Creating and using temporary schema-objects -linkTitle: Temporary schema-objects -description: Describes how to create temporary schema-objects of all kinds without needing the dedicated create temporary syntax. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: creating-and-using-temporary-schema-objects - parent: the-sql-language - weight: 200 -type: indexpage ---- - -A temporary schema-object can be created at any time during a session's lifetime and lasts for no longer than the session. - -{{< note title="Note" >}} -The role that creates a temporary schema-object must have the _temporary_ privilege on the current database. -{{< /note >}} - -Apart from their limited lifetime, temporary schema-objects are largely the same, semantically, as their permanent counterparts. But there are critical differences: - -- A temporary table's content is private to the session that created it. (By extension, the content of an index on a temporary table is private too.) Moreover, a temporary table uniquely supports the use the special syntax _on commit delete rows_ (see the _[create table](../statements/ddl_create_table)_ section). - -- You can see metadata about one session's temporary objects from another session, for as long as the first session lasts. But no session except the one that created a temporary object can use it. - -Here are some scenarios where temporary schema-objects are useful. - -- Oracle Database supports a schema-object kind called _package_. A package encapsulates user-defined subprograms together with package-level global variables. Such variables have session duration and the values are private within a single session. But PostgreSQL, and therefore YSQL, have no _package_ construct. A one-column, one-row temporary table can be used to model a scalar package global variable; a one-column, multi-row temporary table can be used to model an array of scalars; and a multi-column, multi-row temporary table can be used to model an array of user-defined type occurrences. -- Oracle Database supports its equivalent of PostgreSQL's _prepare-and-execute_ paradigm for anonymous PL/SQL blocks as well as for regular DML statements. But PostgreSQL's _prepare_ statement supports only regular DML statements and not the _do_ statement. In Oracle Database, parameterized anonymous PL/SQL blocks are used when the encapsulated steps need to be done several times in a session, binding in different actual arguments each time, during some kind of set up flow, but never need to be done again. A temporary _[language plpgsql](../../user-defined-subprograms-and-anon-blocks/#language-plpgsql-subprograms)_ procedure in PostgreSQL, and therefore in YSQL, meets this use case perfectly. -- See the section [Porting from Oracle PL/SQL](https://www.postgresql.org/docs/15/plpgsql-porting.html) in the PostgreSQL documentation. - -Look, now, at each of the following child sections: - -- [Temporary tables, views, and sequences](./temporary-tables-views-sequences-and-indexes/) - -- [Creating temporary schema-objects of all kinds](./creating-temporary-schema-objects-of-all-kinds/) - -- [Demonstrate the globality of metadata, and the privacy of use of temporary objects](./globality-of-metadata-and-privacy-of-use-of-temp-objects/) - -- [Recommended on-demand paradigm for creating temporary objects](./on-demand-paradigm-for-creating-temporary-objects/) \ No newline at end of file diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/_index.md b/docs/content/preview/api/ysql/the-sql-language/statements/_index.md deleted file mode 100644 index 07ed68d497e6..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/_index.md +++ /dev/null @@ -1,174 +0,0 @@ ---- -title: SQL statements [YSQL] -headerTitle: Categorized list of SQL statements -linkTitle: SQL statements -description: List of PostgreSQL-compatible SQL statements supported by Yugabyte SQL (YSQL) -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: statements - parent: the-sql-language - weight: 100 -aliases: - - /preview/api/ysql/commands/ -type: indexpage -showRightNav: true ---- - -The YSQL statements are compatible with the SQL dialect that PostgreSQL supports. The sidebar lists all of the YSQL statements in alphabetical order. The following tables list them by category. - -## Data definition language (DDL) - -| Statement | Description | -| :-------- | :---------- | -| [`ALTER DATABASE`](ddl_alter_db) | Change database definition | -| [`ALTER DOMAIN`](ddl_alter_domain) | Change domain definition | -| [`ALTER FOREIGN DATA WRAPPER`](ddl_alter_foreign_data_wrapper) | Change foreign data wrapper definition | -| [`ALTER FOREIGN TABLE`](ddl_alter_foreign_table) | Change foreign table definition | -| [`ALTER INDEX`](ddl_alter_index) | Change index definition | -| [`ALTER MATERIALIZED VIEW`](ddl_alter_matview) | Change materialized view definition | -| [`ALTER PUBLICATION`](ddl_alter_publication) | Change publication definition | -| [`ALTER SEQUENCE`](ddl_alter_sequence) | Change sequence definition | -| [`ALTER SERVER`](ddl_alter_server) | Change foreign server definition | -| [`ALTER SCHEMA`](ddl_alter_schema) | Change schema definition | -| [`ALTER TABLE`](ddl_alter_table) | Change table definition | -| [`COMMENT`](ddl_comment) | Set, update, or remove a comment on a database object | -| [`CREATE AGGREGATE`](ddl_create_aggregate) | Create an aggregate | -| [`CREATE CAST`](ddl_create_cast) | Create a cast | -| [`CREATE DATABASE`](ddl_create_database) | Create a database | -| [`CREATE DOMAIN`](ddl_create_domain) | Create a user-defined data type with optional constraints | -| [`CREATE EXTENSION`](ddl_create_extension) | Load an extension | -| [`CREATE FOREIGN DATA WRAPPER`](ddl_create_foreign_data_wrapper) | Create a foreign-data wrapper | -| [`CREATE FOREIGN TABLE`](ddl_create_foreign_table) | Create a foreign table | -| [`CREATE FUNCTION`](ddl_create_function) | Create a function | -| [`CREATE INDEX`](ddl_create_index/) | Create an index | -| [`CREATE MATERIALIZED VIEW`](ddl_create_matview) | Create a materialized view | -| [`CREATE OPERATOR`](ddl_create_operator) | Create an operator | -| [`CREATE OPERATOR CLASS`](ddl_create_operator_class) | Create an operator class | -| [`CREATE PROCEDURE`](ddl_create_procedure) | Create a procedure | -| [`CREATE PUBLICATION`](ddl_create_publication) | Create a publication | -| [`CREATE RULE`](ddl_create_rule) | Create a rule | -| [`CREATE SCHEMA`](ddl_create_schema) | Create a schema (namespace) | -| [`CREATE SEQUENCE`](ddl_create_sequence) | Create a sequence generator | -| [`CREATE SERVER`](ddl_create_server) | Create a foreign server | -| [`CREATE TABLE`](ddl_create_table) | Create an empty table | -| [`CREATE TABLE AS`](ddl_create_table_as) | Create a table from the results of a executing a `SELECT` | -| [`CREATE TABLESPACE`](ddl_create_tablespace) | Create a tablespace | -| [`CREATE TRIGGER`](ddl_create_trigger) | Create a trigger | -| [`CREATE TYPE`](ddl_create_type) | Create a type | -| [`CREATE USER MAPPING`](ddl_create_user_mapping) | Create a user mapping | -| [`CREATE VIEW`](ddl_create_view) | Create a view | -| [`DROP AGGREGATE`](ddl_drop_aggregate) | Delete an aggregate | -| [`DROP CAST`](ddl_drop_cast) | Delete a cast | -| [`DROP DATABASE`](ddl_drop_database) | Delete a database from the system | -| [`DROP DOMAIN`](ddl_drop_domain) | Delete a domain | -| [`DROP EXTENSION`](ddl_drop_extension) | Delete an extension | -| [`DROP FOREIGN DATA WRAPPER`](ddl_drop_foreign_data_wrapper) | Drop a foreign-data wrapper | -| [`DROP FOREIGN TABLE`](ddl_drop_foreign_table) | Drop a foreign table | -| [`DROP FUNCTION`](ddl_drop_function) | Delete a function | -| [`DROP INDEX`](ddl_drop_index) | Delete an index from a database | -| [`DROP MATERIALIZED VIEW`](ddl_drop_matview) | Drop a materialized view | -| [`DROP OPERATOR`](ddl_drop_operator) | Delete an operator | -| [`DROP OPERATOR CLASS`](ddl_drop_operator_class) | Delete an operator class | -| [`DROP PROCEDURE`](ddl_drop_procedure) | Delete a procedure | -| [`DROP PUBLICATION`](ddl_drop_publication) | Delete a publication | -| [`DROP RULE`](ddl_drop_rule) | Delete a rule | -| [`DROP SCHEMA`](ddl_drop_schema) | Delete a schema from the system | -| [`DROP SEQUENCE`](ddl_drop_sequence) | Delete a sequence generator | -| [`DROP SERVER`](ddl_drop_server) | Drop a foreign server | -| [`DROP TABLE`](ddl_drop_table) | Delete a table from a database | -| [`DROP TABLESPACE`](ddl_drop_tablespace) | Delete a tablespace from the cluster | -| [`DROP TYPE`](ddl_drop_type) | Delete a user-defined type | -| [`DROP TRIGGER`](ddl_drop_trigger) | Delete a trigger | -| [`DROP VIEW`](ddl_drop_view) | Drop a view | -| [`IMPORT FOREIGN SCHEMA`](ddl_import_foreign_schema) | Import a foreign schema | -| [`REFRESH MATERIALIZED VIEW`](ddl_refresh_matview) | Refresh a materialized view | -| [`TRUNCATE`](ddl_truncate) | Clear all rows from a table | - -## Data manipulation language (DML) - -| Statement | Description | -| :------------------------ | :---------------------------------------- | -| [`CLOSE`](dml_close/) | Remove a cursor | -| [`DECLARE`](dml_declare/) | Create a cursor | -| [`DELETE`](dml_delete/) | Delete rows from a table | -| [`FETCH`](dml_fetch/) | Fetch rows from a cursor | -| [`INSERT`](dml_insert/) | Insert rows into a table | -| [`MOVE`](dml_move/) | Move the current position within a cursor | -| [`SELECT`](dml_select/) | Select rows from a table | -| [`UPDATE`](dml_update/) | Update rows in a table | - -## Data control language (DCL) - -| Statement | Description | -| :-------- | :---------- | -| [`ALTER DEFAULT PRIVILEGES`](dcl_alter_default_privileges) | Define default privileges | -| [`ALTER GROUP`](dcl_alter_group) | Alter a group | -| [`ALTER POLICY`](dcl_alter_policy) | Alter a row level security policy | -| [`ALTER ROLE`](dcl_alter_role) | Alter a role (user or group) | -| [`ALTER USER`](dcl_alter_user) | Alter a user | -| [`CREATE GROUP`](dcl_create_group) | Create a group (role) | -| [`CREATE POLICY`](dcl_create_policy) | Create a row level security policy | -| [`CREATE ROLE`](dcl_create_role) | Create a role (user or group) | -| [`CREATE USER`](dcl_create_user) | Create a user (role) | -| [`DROP GROUP`](dcl_drop_group) | Drop a group | -| [`DROP POLICY`](dcl_drop_policy) | Drop a row level security policy | -| [`DROP ROLE`](dcl_drop_role) | Drop a role (user or group) | -| [`DROP OWNED`](dcl_drop_owned) | Drop owned objects | -| [`DROP USER`](dcl_drop_user) | Drop a user | -| [`GRANT`](dcl_grant) | Grant permissions | -| [`REASSIGN OWNED`](dcl_reassign_owned) | Reassign owned objects | -| [`REVOKE`](dcl_revoke) | Revoke permissions | -| [`SET ROLE`](dcl_set_role) | Set a role | -| [`SET SESSION AUTHORIZATION`](dcl_set_session_authorization) | Set session authorization | - -## Transaction control language (TCL) - -| Statement | Description | -| :-------- | :---------- | -| [`ABORT`](txn_abort) | Roll back a transaction | -| [`BEGIN`](txn_begin/) | Start a transaction | -| [`COMMIT`](txn_commit) | Commit a transaction | -| [`END`](txn_end) | Commit a transaction | -| [`LOCK`](txn_lock) | Lock a table | -| [`ROLLBACK`](txn_rollback) | Roll back a transaction | -| [`SET CONSTRAINTS`](txn_set_constraints) | Set constraints on current transaction | -| [`SET TRANSACTION`](txn_set) | Set transaction behaviors | -| [`SHOW TRANSACTION`](txn_show) | Show properties of a transaction | -| [`START TRANSACTION`](txn_start) | Start a transaction | -| [`SAVEPOINT`](savepoint_create) | Create a new savepoint | -| [`ROLLBACK TO`](savepoint_rollback) | Rollback to a savepoint | -| [`RELEASE`](savepoint_release) | Release a savepoint | - -## Session and system control - -| Statement | Description | -| :-------- | :---------- | -| [`RESET`](cmd_reset) | Reset a run-time parameter to its default value | -| [`SET`](cmd_set) | Set the value of a run-time parameter | -| [`SHOW`](cmd_show) | Show the value of a run-time parameter | - -## Performance control - -| Statement | Description | -| :-------- | :---------- | -| [`DEALLOCATE`](perf_deallocate) | Deallocate a prepared statement | -| [`EXECUTE`](perf_execute) | Execute a prepared statement | -| [`EXPLAIN`](perf_explain) | Explain an execution plan for a statement | -| [`PREPARE`](perf_prepare) | Prepare a statement | - -## Streaming replication protocol statements - -| Statement | Description | -| :-------- | :---------- | -| [`CREATE_REPLICATION_SLOT`](streaming_create_repl_slot) | Create a replication slot | -| [`DROP_REPLICATION_SLOT`](streaming_drop_repl_slot) | Drop a replication slot | -| [`START_REPLICATION`](streaming_start_replication) | Start streaming from a replication slot | - -## Other statements - -| Statement | Description | -| :-------- | :---------- | -| [`ANALYZE`](cmd_analyze) | Collect statistics about a database | -| [`COPY`](cmd_copy) | Copy data between tables and files | -| [`DO`](cmd_do) | Execute an anonymous PL/pgSQL code block | diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_alter_table.md b/docs/content/preview/api/ysql/the-sql-language/statements/ddl_alter_table.md deleted file mode 100644 index 8ea798550f4f..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_alter_table.md +++ /dev/null @@ -1,412 +0,0 @@ ---- -title: ALTER TABLE statement [YSQL] -headerTitle: ALTER TABLE -linkTitle: ALTER TABLE -description: Use the `ALTER TABLE` statement to change the definition of a table. -menu: - preview_api: - identifier: ddl_alter_table - parent: statements -aliases: - - /preview/api/ysql/commands/ddl_alter_table/ -type: docs ---- - -## Synopsis - -Use the `ALTER TABLE` statement to change the definition of a table. - -## Syntax - -{{%ebnf%}} - alter_table, - alter_table_action, - alter_table_constraint, - alter_column_action, - alter_column_constraint, - table_expr, - sequence_options -{{%/ebnf%}} - -

-{{< note title="Table inheritance is not yet supported" >}} - -YSQL in the present "latest" YugabyteDB does not yet support the "table inheritance" feature that is described in the [PostgreSQL documentation](https://www.postgresql.org/docs/15/ddl-inherit.html). The attempt to create a table that inherits another table causes the _0A000 (feature_not_supported)_ error with the message _"INHERITS not supported yet"_. This means that the syntax that the `table_expr` rule allows doesn't yet bring any useful meaning. - -It says that you can write, for example, this: - -```plpgsql -alter table t * add column y text; -``` - -or this: - -```plpgsql -alter table only t add column y text; -``` - -These variants are useful only when at least one other table inherits `t`. But as yet, no table can inherit `t`. This means that if the unadorned variant `alter table t...` runs without error, then each of these variants will run without error too. But the effect of each is the same as that of the unadorned variant. Until inheritance is supported, use a bare [table_name](../../../syntax_resources/grammar_diagrams/#table-name). -{{< /note >}} - -## Semantics - -### *alter_table_action* - -Specify one of the following actions. - -#### ADD [ COLUMN ] [ IF NOT EXISTS ] *column_name* *data_type* *constraint* - -Add the specified column with the specified data type and [constraint](#constraints). - -##### Table rewrites - -ADD COLUMN … DEFAULT statements require a [table rewrite](#alter-table-operations-that-involve-a-table-rewrite) when the default value is a _volatile_ expression. [Volatile expressions](https://www.postgresql.org/docs/current/xfunc-volatility.html#XFUNC-VOLATILITY) can return different results for different rows, so a table rewrite is required to fill in values for existing rows. For non-volatile expressions, no table rewrite is required. - -Examples of volatile expressions: - -- ALTER TABLE … ADD COLUMN v1 INT DEFAULT random() -- ALTER TABLE .. ADD COLUMN v2 UUID DEFAULT gen_random_uuid() - -Examples of non-volatile expressions (no table rewrite): - -- ALTER TABLE … ADD COLUMN nv1 INT DEFAULT 5 -- ALTER TABLE … ADD COLUMN nv2 timestamp DEFAULT now() -- uses the same timestamp now() for all existing rows - -#### RENAME TO *table_name* - -Rename the table to the specified table name. - -{{< note title="Note" >}} - -Renaming a table is a non blocking metadata change operation. - -{{< /note >}} - -#### SET TABLESPACE *tablespace_name* - -Asynchronously change the tablespace of an existing table. - -The tablespace change will immediately reflect in the config of the table, however the tablet move by the load balancer happens in the background. - -While the load balancer is performing the move it is perfectly safe from a correctness perspective to do reads and writes, however some query optimization that happens based on the data location may be off while data is being moved. - -##### Example - -```sql -yugabyte=# ALTER TABLE bank_transactions_eu SET TABLESPACE eu_central_1_tablespace; -``` - -```output -NOTICE: Data movement for table bank_transactions_eu is successfully initiated. -DETAIL: Data movement is a long running asynchronous process and can be monitored by checking the tablet placement in http://:7000/tables -ALTER TABLE -``` - -Tables can be moved to the default tablespace using: - -```sql -ALTER TABLE table_name SET TABLESPACE pg_default; -``` - -#### SET LOGGED | UNLOGGED - -Changes the table from unlogged to logged or vice-versa. Cannot be applied to a temporary table. - -Currently the *UNLOGGED* option is ignored. It's handled as *LOGGED* default persistence. - -#### SET ( *param_name* = *param_value* ) - -Change the specified storage parameter into the provided value. - -Storage parameters, [as defined by PostgreSQL](https://www.postgresql.org/docs/15/sql-createtable.html#SQL-CREATETABLE-STORAGE-PARAMETERS), are ignored and only present for compatibility with PostgreSQL. - -#### RESET ( *param_name* ) - -Reset the specified storage parameter. - -Storage parameters, [as defined by PostgreSQL](https://www.postgresql.org/docs/15/sql-createtable.html#SQL-CREATETABLE-STORAGE-PARAMETERS), are ignored and only present for compatibility with PostgreSQL. - -#### DROP [ COLUMN ] [ IF EXISTS ] *column_name* [ RESTRICT | CASCADE ] - -Drop the named column from the table. - -- `RESTRICT` — Remove only the specified column. -- `CASCADE` — Remove the specified column and any dependent objects. - -##### Example - -Set up and populate a parents-children pair of tables: - -```plpgsql -drop table if exists children cascade; -drop table if exists parents cascade; - --- The column "b" models a (natural) business unique key. -create table parents( - k int primary key, - b int not null, - v text not null, - constraint parents_b_unq unique(b)); - -create table children( - parents_b int not null, - k int not null, - v text not null, - - constraint children_pk primary key(parents_b, k), - - constraint children_fk foreign key(parents_b) - references parents(b) - match full - on delete cascade - on update restrict); - -insert into parents(k, b, v) values (1, 10, 'dog'), (2, 20, 'cat'), (3, 30, 'frog'); - -insert into children(parents_b, k, v) values - (10, 1, 'dog-child-a'), - (10, 2, 'dog-child-b'), - (10, 3, 'dog-child-c'), - (20, 1, 'cat-child-a'), - (20, 2, 'cat-child-b'), - (20, 3, 'cat-child-c'), - (30, 1, 'frog-child-a'), - (30, 2, 'frog-child-b'), - (30, 3, 'frog-child-c'); - -select p.v as "p.v", c.v as "c.v" -from parents p inner join children c on c.parents_b = p.b -order by p.b, c.k; -``` - -This is the result: - -```output - p.v | c.v -------+-------------- - dog | dog-child-a - dog | dog-child-b - dog | dog-child-c - cat | cat-child-a - cat | cat-child-b - cat | cat-child-c - frog | frog-child-a - frog | frog-child-b - frog | frog-child-c -``` - -The `\d children` meta-command shows that it has a foreign key that's a dependent object on the column `b` in the `parents` table: - -```output -Indexes: - "children_pk" PRIMARY KEY, lsm (parents_b HASH, k ASC) -Foreign-key constraints: - "children_fk" FOREIGN KEY (parents_b) REFERENCES parents(b) MATCH FULL ON UPDATE RESTRICT ON DELETE CASCADE -``` - -This is a contrived example. It is unusual practice (and normally bad practice) to make a foreign key constraint target anything but the column list upon which the parent table's primary key constraint is defined. But there are sometimes defensible reasons to do this. - -Now try to drop the column `parents.b`: - -```plpgsql -do $body$ -declare - message text not null := ''; - detail text not null := ''; -begin - -- Causes error 'cos "cascade" is required. - alter table parents drop column b; - assert false, 'Should not get here'; -exception - -- Error 2BP01 - when dependent_objects_still_exist then - get stacked diagnostics - message = message_text, - detail = pg_exception_detail; - assert message = 'cannot drop column b of table parents because other objects depend on it', 'Bad message'; - assert detail = 'constraint children_fk on table children depends on column b of table parents', 'Bad detail'; -end; -$body$; -``` - -It finishes without error, showing that the bare `alter table parents drop column b`, without `cascade`, fails and causes the message and hint that the code presents. Now repeat the attempt with `cascade` and observe the result: - -```plpgsql -alter table parents drop column b cascade; -``` - -It quietly succeeds. Now `\d children` shows that the foreign key constraint `children_fk` has been transitively dropped. - -#### ADD *alter_table_constraint* - -Add the specified [constraint](#constraints) to the table. - -##### Table rewrites - -Adding a `PRIMARY KEY` constraint results in a full table rewrite of the main table and all associated indexes, which can be a potentially expensive operation. For more details about table rewrites, see [Alter table operations that involve a table rewrite](#alter-table-operations-that-involve-a-table-rewrite). - -The table rewrite is needed because of how YugabyteDB stores rows and indexes. In YugabyteDB, data is distributed based on the primary key; when a table does not have an explicit primary key assigned, YugabyteDB automatically creates an internal row ID to use as the table's primary key. As a result, these rows need to be rewritten to use the newly added primary key column. For more information, refer to [Primary keys](../../../../../develop/data-modeling/primary-keys-ysql). - -#### ALTER [ COLUMN ] *column_name* [ SET DATA ] TYPE *data_type* [ COLLATE *collation* ] [ USING *expression* ] - -Change the type of an existing column. The following semantics apply: - -- If the optional `COLLATE` clause is not specified, the default collation for the new column type will be used. -- If the optional `USING` clause is not provided, the default conversion for the new column value will be the same as an assignment cast from the old type to the new type. -- A `USING` clause must be included when there is no implicit assignment cast available from the old type to the new type. -- Alter type is not supported for partitioned tables. See {{}}. -- Alter type is not supported for tables with rules (limitation inherited from PostgreSQL). -- Alter type is not supported for tables with CDC streams if a table rewrite is required. See {{}}. -- Alter type is not supported for tables under xCluster replication if a table rewrite is required. This will be supported by automatic mode in a future release. See {{}}. - -##### Table rewrites - -Altering a column's type requires a [full table rewrite](#alter-table-operations-that-involve-a-table-rewrite), and any indexes that contain this column when the underlying storage format changes or if the data changes. - -The following type changes commonly require a table rewrite: - -| From | To | Reason for table rewrite | -| ------------ | -------------- | --------------------------------------------------------------------- | -| INTEGER | TEXT | Different storage formats. | -| TEXT | INTEGER | Needs parsing and validation. | -| JSON | JSONB | Different internal representation. | -| UUID | TEXT | Different binary format. | -| BYTEA | TEXT | Different encoding. | -| TIMESTAMP | DATE | Loses time info; storage changes. | -| BOOLEAN | INTEGER | Different sizes and encoding. | -| REAL | NUMERIC | Different precision and format. | -| NUMERIC(p,s) | NUMERIC(p2,s2) | Requires data changes if scale is changed or if precision is smaller. | - -The following type changes do not require a rewrite when there is no associated index table on the column. When there is an associated index table on the column, a rewrite is performed on the index table alone but not on the main table. - -| From | To | Notes | -| ------------ | ------------------ | ------------------------------------------------------ | -| VARCHAR(n) | VARCHAR(m) (m > n) | Length increase is compatible. | -| VARCHAR(n) | TEXT | Always compatible. | -| SERIAL | INTEGER | Underlying type is INTEGER; usually OK. | -| NUMERIC(p,s) | NUMERIC(p2,s2) | If new precision is larger and scale remains the same. | -| CHAR(n) | CHAR(m) (m > n) | PG stores it as padded TEXT, so often fine. | -| Domain types | Their base type | Compatible, unless additional constraints exist. | - -Altering a column with a (non-trivial) USING clause always requires a rewrite. - -The table rewrite operation preserves split properties for hash-partitioned tables and hash-partitioned secondary indexes. For range-partitioned tables (and secondary indexes), split properties are only preserved if the altered column is not part of the table's (or secondary index's) range key. - -For example, the following ALTER TYPE statements would cause a table rewrite: - -- ALTER TABLE foo - ALTER COLUMN foo_timestamp TYPE timestamp with time zone - USING - timestamp with time zone 'epoch' + foo_timestamp * interval '1 second'; -- ALTER TABLE t ALTER COLUMN t_num1 TYPE NUMERIC(9,5) -- from NUMERIC(6,1); -- ALTER TABLE test ALTER COLUMN a SET DATA TYPE BIGINT USING a::BIGINT; -- from INT - -The following ALTER TYPE statement does not cause a table rewrite: - -- ALTER TABLE test ALTER COLUMN a TYPE VARCHAR(51); -- from VARCHAR(50) - -#### DROP CONSTRAINT *constraint_name* [ RESTRICT | CASCADE ] - -Drop the named constraint from the table. - -- `RESTRICT` — Remove only the specified constraint. -- `CASCADE` — Remove the specified constraint and any dependent objects. - -##### Table rewrites - -Dropping the `PRIMARY KEY` constraint results in a full table rewrite and full rewrite of all indexes associated with the table, which is a potentially expensive operation. For more details and common limitations of table rewrites, refer to [Alter table operations that involve a table rewrite](#alter-table-operations-that-involve-a-table-rewrite). - -#### RENAME [ COLUMN ] *column_name* TO *column_name* - -Rename a column to the specified name. - -#### RENAME CONSTRAINT *constraint_name* TO *constraint_name* - -Rename a constraint to the specified name. - -##### Example - -Create a table with a constraint and rename the constraint: - -```sql -CREATE TABLE test(id BIGSERIAL PRIMARY KEY, a TEXT); -ALTER TABLE test ADD constraint vague_name unique (a); -ALTER TABLE test RENAME CONSTRAINT vague_name TO unique_a_constraint; -``` - -#### ENABLE / DISABLE ROW LEVEL SECURITY - -This enables or disables row level security for the table. - -If enabled and no policies exist for the table, then a default-deny policy is applied. - -If disabled, then existing policies for the table will not be applied and will be ignored. - -See [CREATE POLICY](../dcl_create_policy) for details on how to create row level security policies. - -#### FORCE / NO FORCE ROW LEVEL SECURITY - -This controls the application of row security policies for the table when the user is the table owner. - -If enabled, row level security policies will be applied when the user is the table owner. - -If disabled (the default) then row level security will not be applied when the user is the table owner. - -See [CREATE POLICY](../dcl_create_policy) for details on how to create row level security policies. - -### Constraints - -Specify a table or column constraint. - -#### CONSTRAINT *constraint_name* - -Specify the name of the constraint. - -#### Foreign key - -`FOREIGN KEY` and `REFERENCES` specify that the set of columns can only contain values that are present in the referenced columns of the referenced table. It is used to enforce referential integrity of data. - -#### Unique - -This enforces that the set of columns specified in the `UNIQUE` constraint are unique in the table, that is, no two rows can have the same values for the set of columns specified in the `UNIQUE` constraint. - -#### Check - -This is used to enforce that data in the specified table meets the requirements specified in the `CHECK` clause. - -#### Default - -This is used to specify a default value for the column. If an `INSERT` statement does not specify a value for the column, then the default value is used. If no default is specified for a column, then the default is NULL. - -#### Deferrable constraints - -Constraints can be deferred using the `DEFERRABLE` clause. Currently, only foreign key constraints -can be deferred in YugabyteDB. A constraint that is not deferrable will be checked after every row -within a statement. In the case of deferrable constraints, the checking of the constraint can be postponed -until the end of the transaction. - -Constraints marked as `INITIALLY IMMEDIATE` will be checked after every row within a statement. - -Constraints marked as `INITIALLY DEFERRED` will be checked at the end of the transaction. - -## Alter table operations that involve a table rewrite - -Most ALTER TABLE statements only involve a schema modification and complete quickly. However, certain specific ALTER TABLE statements require a new copy of the underlying table (and associated index tables, in some cases) to be made and can potentially take a long time, depending on the sizes of the tables and indexes involved. This is typically referred to as a "table rewrite". This behavior is [similar to PostgreSQL](https://www.crunchydata.com/blog/when-does-alter-table-require-a-rewrite), though the exact scenarios when a rewrite is triggered may differ between PostgreSQL and YugabyteDB. - -It is not safe to execute concurrent DML on the table during a table rewrite because the results of any concurrent DML are not guaranteed to be reflected in the copy of the table being made. This restriction is similar to PostgreSQL, which explicitly prevents concurrent DML during a table rewrite by acquiring an ACCESS EXCLUSIVE table lock. - -If you need to perform one of these expensive rewrites, it is recommended to combine them into a single ALTER TABLE statement to avoid multiple expensive rewrites. For example: - -```sql -ALTER TABLE t ADD COLUMN c6 UUID DEFAULT gen_random_uuid(), ALTER COLUMN c8 TYPE TEXT -``` - -The following ALTER TABLE operations involve making a full copy of the underlying table (and possibly associated index tables): - -1. [Adding](#add-alter) or [dropping](#drop-constraint-constraint-restrict-cascade) the primary key of a table. -1. [Adding a column with a (volatile) default value](#add-column-if-not-exists-column-data-constraint). -1. [Changing the type of a column](#alter-column-column-set-data-type-data-collate-collation-using-expression). - -## See also - -- [CREATE TABLE](../ddl_create_table) diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_create_index.md b/docs/content/preview/api/ysql/the-sql-language/statements/ddl_create_index.md deleted file mode 100644 index cede3fc33af8..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_create_index.md +++ /dev/null @@ -1,418 +0,0 @@ ---- -title: CREATE INDEX statement [YSQL] -headerTitle: CREATE INDEX -linkTitle: CREATE INDEX -description: Use the CREATE INDEX statement to create an index on the specified columns of the specified table. -menu: - preview_api: - identifier: ddl_create_index - parent: statements -type: docs ---- - -## Synopsis - -Use the CREATE INDEX statement to create an index on the specified columns of the specified table. Indexes are primarily used to improve query performance. - -In YugabyteDB, indexes are sharded - they are split into tablets and distributed across the different nodes in the cluster, just like regular tables. Index sharding is based on the primary key of the index and is independent of how the main table is sharded and distributed, except for primary key indexes, which are implemented in the main table itself. - -## Syntax - -{{%ebnf%}} - create_index, - index_elem -{{%/ebnf%}} - -## Semantics - -### Concurrent index creation - -Index creation in YugabyteDB can happen CONCURRENTLY or NONCONCURRENTLY. The default mode is CONCURRENTLY, wherever possible (see [CONCURRENTLY](#concurrently) for restrictions). - -Concurrent index creation allows data to be modified in the main table while the index is being built. It is implemented by an online index backfill process, which is a combination of a distributed index backfill process that works on existing data using parallel workers, and an online component that mirrors newer changes to main table rows into the index. Nonconcurrent index builds are not safe to perform while there are ongoing changes to the main table, however, this restriction is currently not enforced. The following table summarizes the differences in these two modes. - -| Condition | Concurrent | Nonconcurrent | -| :-------- | :--------- | :------------ | -| Safe to do other DMLs during CREATE INDEX? | yes | no | -| Keeps other transactions alive during CREATE INDEX? | mostly | no | -| Parallelizes index loading? | yes | no | - -{{< note title="Note" >}} - -For more details on how online index backfill works, refer to [Online Index Backfill](https://github.com/yugabyte/yugabyte-db/blob/master/architecture/design/online-index-backfill.md). Flags controlling the speed of online index backfill are described in [Index backfill flags](../../../../../reference/configuration/yb-tserver/#index-backfill-flags). - -{{< /note >}} - -### Colocation - -If the table is colocated, its index is also colocated; if the table is not colocated, its index is also not colocated. - -### Partitioned indexes - -Creating an index on a partitioned table automatically creates a corresponding index for every partition in the default tablespace. It's also possible to create an index on each partition individually, which you should do in the following cases: - -- Parallel writes are expected while creating the index, because concurrent builds for indexes on partitioned tables aren't supported. In this case, it's better to use concurrent builds to create indexes on each partition individually. -- [Row-level geo-partitioning](../../../../../explore/multi-region-deployments/row-level-geo-partitioning/) is being used. In this case, create the index separately on each partition to customize the tablespace in which each index is created. -- `CREATE INDEX CONCURRENTLY` is not supported for partitioned tables (see [CONCURRENTLY](#concurrently)). As a workaround, you can use the [ONLY](#only) keyword to create indexes on child partitions separately, as described in that section. - -### UNIQUE - -Enforce that duplicate values in a table are not allowed. - -### CONCURRENTLY - -Enable the use of online index backfill (see [Semantics](#semantics) for details), with some restrictions: - -- When creating an index on a temporary table, online schema migration is disabled. -- CREATE INDEX CONCURRENTLY is not supported for partitioned tables. -- CREATE INDEX CONCURRENTLY is not supported inside a transaction block. - -### NONCONCURRENTLY - -Disable online index backfill (see [Semantics](#semantics) for details). - -### ONLY - -Indicates not to recurse creating indexes on partitions, if the table is partitioned. The default is to recurse. - -When recursion is disabled using ONLY, the index is created in an INVALID state on only the (parent) partitioned table. To make the index valid, corresponding indexes have to be created on each of the existing partitions and attached to the parent index using `ALTER INDEX parent_index ... ATTACH PARTITION child_index`. For example: - -```sql -CREATE TABLE parent_partition(c1 int, c2 int) PARTITION BY RANGE (c1); -CREATE TABLE child_part_1 PARTITION OF parent_partition FOR VALUES FROM (0) to (100); -CREATE TABLE child_part_2 PARTITION OF parent_partition FOR VALUES FROM (101) to (200); - -CREATE INDEX parent_index ON ONLY parent_partition (c1, c2); - -\d parent_partition -``` - -```output - Table "public.parent_partition" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | -Partition key: RANGE (c1) -Indexes: - "parent_index" lsm (c1 HASH, c2 ASC) INVALID -Number of partitions: 2 (Use \d+ to list them.) -``` - -```sql -CREATE INDEX child_part_1_index ON child_part_1 (c1, c2); -CREATE INDEX child_part_2_index ON child_part_2 (c1, c2); -ALTER INDEX parent_index ATTACH PARTITION child_part_1_index; -ALTER INDEX parent_index ATTACH PARTITION child_part_2_index; - -\d parent_partition -``` - -```output - Table "public.parent_partition" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - c1 | integer | | | - c2 | integer | | | -Partition key: RANGE (c1) -Indexes: - "parent_index" lsm (c1 HASH, c2 ASC) -Number of partitions: 2 (Use \d+ to list them.) -``` - -### *access_method_name* - -The name of the index access method. By default, `lsm` is used for YugabyteDB tables and `btree` is used otherwise (for example, temporary tables). - -[GIN indexes](../../../../../explore/ysql-language-features/indexes-constraints/gin/) can be created in YugabyteDB by using the `ybgin` access method. - -### INCLUDE clause - -Specify a list of columns which will be included in the index as non-key columns. - -### TABLESPACE clause - -Specify the name of the [tablespace](../../../../../explore/going-beyond-sql/tablespaces/) that describes the placement configuration for this index. By default, indexes are placed in the `pg_default` tablespace, which spreads the tablets of the index evenly across the cluster. - -### WHERE clause - -A [partial index](#partial-indexes) is an index that is built on a subset of a table and includes only rows that satisfy the condition specified in the WHERE clause. - -It can be used to exclude NULL or common values from the index, or include just the rows of interest. - -This speeds up any writes to the table because rows containing the common column values don't need to be indexed. - -It also reduces the size of the index, thereby improving the speed for read queries that use the index. - -#### *name* - -Specify the name of the index to be created. - -#### *table_name* - -Specify the name of the table to be indexed. - -#### *index_elem* - -#### *column_name* - -Specify the name of a column of the table. - -#### *expression* - -Specify one or more columns of the table and must be surrounded by parentheses. - -- `HASH` - Use hash of the column. This is the default option for the first column and is used to shard the index table. -- `ASC` — Sort in ascending order. This is the default option for second and subsequent columns of the index. -- `DESC` — Sort in descending order. -- `NULLS FIRST` - Specifies that nulls sort before non-nulls. This is the default when DESC is specified. -- `NULLS LAST` - Specifies that nulls sort after non-nulls. This is the default when DESC is not specified. - -### NULLS NOT DISTINCT - -When creating an unique index, by default, NULL values are treated as distinct entries (and not equal), allowing multiple nulls in the column. The NULLS NOT DISTINCT option modifies this and causes the index to treat nulls equivalently. With this option, you can enforce that only one NULL value is permitted, aligning with use cases where NULL should represent an absence of value rather than a unique entity. - -### SPLIT INTO - -For hash-sharded indexes, you can use the SPLIT INTO clause to specify the number of tablets to be created for the index. The hash range is then evenly split across those tablets. - -Presplitting indexes, using SPLIT INTO, distributes index workloads on a production cluster. For example, if you have 3 servers, splitting the index into 30 tablets can provide higher write throughput on the index. For an example, see [Create an index specifying the number of tablets](#create-an-index-specifying-the-number-of-tablets). - -{{< note title="Note" >}} - -By default, YugabyteDB presplits an index into `ysql_num_shards_per_tserver * num_of_tserver` tablets. The SPLIT INTO clause can be used to override that setting on a per-index basis. - -{{< /note >}} - -### SPLIT AT VALUES - -For range-sharded indexes, you can use the SPLIT AT VALUES clause to set split points to presplit range-sharded indexes. - -**Example** - -```plpgsql -CREATE TABLE tbl( - a INT, - b INT, - PRIMARY KEY(a ASC, b DESC) -); - -CREATE INDEX idx1 ON tbl(b ASC, a DESC) SPLIT AT VALUES((100), (200), (200, 5)); -``` - -In the example above, there are three split points, so four tablets will be created for the index: - -- tablet 1: `b=, a=` to `b=100, a=` -- tablet 2: `b=100, a=` to `b=200, a=` -- tablet 3: `b=200, a=` to `b=200, a=5` -- tablet 4: `b=200, a=5` to `b=, a=` - -{{< note title="Note" >}} - -By default, YugabyteDB creates a range sharded index as a single tablet. The SPLIT AT clause can be used to override that setting on a per-index basis. - -{{< /note >}} - -## Examples - -### Unique index with HASH column ordering - -Create a unique index with hash ordered columns. - -```plpgsql -CREATE TABLE products(id int PRIMARY KEY, - name text, - code text); -CREATE UNIQUE INDEX ON products(code); -\d products -``` - -```output - Table "public.products" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - id | integer | | not null | - name | text | | | - code | text | | | -Indexes: - "products_pkey" PRIMARY KEY, lsm (id HASH) - "products_code_idx" UNIQUE, lsm (code HASH) -``` - -### ASC ordered index - -Create an index with ascending ordered key. - -```plpgsql -CREATE INDEX products_name ON products(name ASC); -\d products_name -``` - -```output - Index "public.products_name" - Column | Type | Key? | Definition ---------+------+------+------------ - name | text | yes | name -lsm, for table "public.products -``` - -### INCLUDE columns - -Create an index with ascending ordered key and include other columns as non-key columns - -```plpgsql -CREATE INDEX products_name_code ON products(name) INCLUDE (code); -\d products_name_code; -``` - -```output - Index "public.products_name_code" - Column | Type | Key? | Definition ---------+------+------+------------ - name | text | yes | name - code | text | no | code -lsm, for table "public.products" -``` - -### Create an index specifying the number of tablets - -To specify the number of tablets for an index, you can use the CREATE INDEX statement with the [SPLIT INTO](#split-into) clause. - -```plpgsql -CREATE TABLE employees (id int PRIMARY KEY, first_name TEXT, last_name TEXT) SPLIT INTO 10 TABLETS; -CREATE INDEX ON employees(first_name, last_name) SPLIT INTO 10 TABLETS; -``` - -### Partial indexes - -Consider an application maintaining shipments information. It has a `shipments` table with a column for `delivery_status`. If the application needs to access in-flight shipments frequently, then it can use a partial index to exclude rows whose shipment status is `delivered`. - -```plpgsql -CREATE TABLE shipments (id int, delivery_status text, address text, delivery_date date); -CREATE INDEX shipment_delivery ON shipments(delivery_status, address, delivery_date) WHERE delivery_status != 'delivered'; -``` - -### Expression indexes - -An index column need not be just a column of the underlying table; it can also be a function, or scalar expression computed from one or more columns of the table. You can also obtain fast access to tables based on the results of computations. - -A basic example is indexing unique emails in a users table similar to the following: - -```plpgsql -CREATE TABLE users(id BIGSERIAL PRIMARY KEY, email TEXT NOT NULL); - -CREATE UNIQUE INDEX users_email_idx ON users(lower(email)); -``` - -Creating a unique index prevents inserting duplicate email addresses using a different case. - -Note that index expressions are only evaluated at index time, so to use the index for a specific query the expression must match exactly. - -```plpgsql -SELECT * FROM users WHERE lower(email)='user@example.com'; # will use the index created above -SELECT * FROM users WHERE email='user@example.com'; # will NOT use the index -``` - -Expression indexes are often used to [index jsonb columns](../../../datatypes/type_json/create-indexes-check-constraints/). - -## Troubleshooting - -If the following troubleshooting tips don't resolve your issue, ask for help in our [community Slack]({{}}) or [file a GitHub issue](https://github.com/yugabyte/yugabyte-db/issues/new?title=Index+backfill+failure). - -### Invalid index - -If online CREATE INDEX fails, an invalid index may be left behind. These indexes are not usable in queries and cause internal operations, so they should be dropped. - -For example, the following commands can create an invalid index: - -```plpgsql -CREATE TABLE uniqueerror (i int); -``` - -```output -CREATE TABLE -``` - -```plpgsql -INSERT INTO uniqueerror VALUES (1), (1); -``` - -```output -INSERT 0 2 -``` - -```plpgsql -CREATE UNIQUE INDEX ON uniqueerror (i); -``` - -```output -ERROR: ERROR: duplicate key value violates unique constraint "uniqueerror_i_idx" -``` - -```plpgsql -\d uniqueerror -``` - -```output - Table "public.uniqueerror" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - i | integer | | | -Indexes: - "uniqueerror_i_idx" UNIQUE, lsm (i HASH) INVALID -``` - -Drop the invalid index as follows: - -```plpgsql -DROP INDEX uniqueerror_i_idx; -``` - -```output -DROP INDEX -``` - -### Common errors and solutions - -- `ERROR: duplicate key value violates unique constraint "uniqueerror_i_idx"` - - **Reason**: When creating a [unique index](#unique), a unique constraint violation was found. - - **Fix**: Resolve the conflicting row(s). - -- `ERROR: Backfilling indexes { timeoutmaster_i_idx } for tablet 42e3857759f54733a47e3bb817636f60 from key '' in state kFailed` - - **Reason**: Server-side backfill timeout is repeatedly hit. - - **Fixes** - - Do any or all of the following: - - - Increase the YB-Master flag [ysql_index_backfill_rpc_timeout_ms](../../../../../reference/configuration/yb-master/#ysql-index-backfill-rpc-timeout-ms) from 60000 (one minute) to 300000 (five minutes). - - Increase the YB-TServer flag [backfill_index_timeout_grace_margin_ms](../../../../../reference/configuration/yb-tserver/#backfill-index-timeout-grace-margin-ms) from -1 (the system automatically calculates the value to be approximately 1 second) to 60000 (one minute). - - Decrease the YB-TServer flag [backfill_index_write_batch_size](../../../../../reference/configuration/yb-tserver/#backfill-index-write-batch-size) from 128 to 32. - -- `ERROR: BackfillIndex RPC (request call id 123) to 127.0.0.1:9100 timed out after 86400.000s` - - **Reason**: Client-side backfill timeout is hit. - - **Fixes** - - The master leader may have changed during backfill. This is currently [not supported][backfill-master-failover-issue]. Retry creating the index, and keep an eye on the master leader. - - Try increasing parallelism. Index backfill happens in parallel across each tablet of the table. A one-tablet table in an [RF-3][rf] setup would not take advantage of the parallelism. (One-tablet tables are default for range-partitioned tables and colocated tables.) On the other hand, no matter how much parallelism there is, a one-tablet index would be a bottleneck for index backfill writes. Partitioning could be improved with [tablet splitting][tablet-splitting]. - - In case the backfill really needs more time, increase [YB-TServer flag][yb-tserver] `backfill_index_client_rpc_timeout_ms` to as long as you expect the backfill to take (for example, one week). - -**To prioritize keeping other transactions alive** during the index backfill, set each of the following to be longer than the longest transaction anticipated: - -- [YB-Master flag][yb-master] `index_backfill_wait_for_old_txns_ms` -- YSQL parameter `yb_index_state_flags_update_delay` - -**To speed up index creation** by a few seconds when you know there will be no online writes, set the YSQL parameter `yb_index_state_flags_update_delay` to zero. - -[backfill-master-failover-issue]: https://github.com/yugabyte/yugabyte-db/issues/6218 -[rf]: ../../../../../architecture/docdb-replication/replication/#replication-factor -[tablet-splitting]: ../../../../../architecture/docdb-sharding/tablet-splitting -[yb-master]: ../../../../../reference/configuration/yb-master/ -[yb-tserver]: ../../../../../reference/configuration/yb-tserver/ diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_create_table.md b/docs/content/preview/api/ysql/the-sql-language/statements/ddl_create_table.md deleted file mode 100644 index cd3da9452440..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_create_table.md +++ /dev/null @@ -1,361 +0,0 @@ ---- -title: CREATE TABLE [YSQL] -headerTitle: CREATE TABLE -linkTitle: CREATE TABLE -description: Use the CREATE TABLE statement to create a table in a database. -menu: - preview_api: - identifier: ddl_create_table - parent: statements -type: docs ---- - -## Synopsis - -Use the CREATE TABLE statement to create a table in a database. It defines the table name, column names and types, primary key, and table properties. - -## Syntax - -{{%ebnf%}} - create_table, - table_elem, - column_constraint, - table_constraint, - key_columns, - hash_columns, - range_columns, - storage_parameters, - storage_parameter, - index_parameters, - references_clause, - split_row, - sequence_options -{{%/ebnf%}} - -## Semantics - -Create a table with *table_name*. If `qualified_name` already exists in the specified database, an error will be raised unless the IF NOT EXISTS clause is used. - -### Primary key - -Primary key can be defined in either `column_constraint` or `table_constraint`, but not in both. - -There are two types of primary key columns: - -- `Hash primary key columns`: The primary key may have zero or more leading hash-partitioned columns. -By default, only the first column is treated as the hash-partition column. But this behavior can be modified by explicit use of the HASH annotation. - -- `Range primary key columns`: A table can have zero or more range primary key columns and it controls the top-level ordering of rows in a table (if there are no hash partition columns) or the ordering of rows among rows that share a common set of hash partitioned column values. By default, the range primary key columns are stored in ascending order. But this behavior can be controlled by explicit use of `ASC` or `DESC`. - -For example, if the primary key specification is `PRIMARY KEY ((a, b) HASH, c DESC)`, then columns `a` & `b` are used together to hash partition the table, and rows that share the same values for `a` and `b` are stored in descending order of their value for `c`. - -If the primary key specification is `PRIMARY KEY(a, b)`, then column `a` is used to hash partition the table, and rows that share the same value for `a` are stored in ascending order of their value for `b`. - -{{}} - -PostgreSQL's table storage is heap-oriented—so a table with no primary key is viable. However YugabyteDB's table storage is index-oriented (see [DocDB Persistence](../../../../../architecture/docdb)), so a table isn't viable without a primary key. - -Therefore, if you don't specify a primary key at table-creation time, YugabyteDB will use the internal `ybrowid` column as PRIMARY KEY and the table will be sharded on `ybrowid HASH`. - -{{}} - -### Foreign key - -FOREIGN KEY and REFERENCES specifies that the set of columns can only contain values that are present in the referenced column(s) of the referenced table. It is used to enforce referential integrity of data. - -### Unique - -This enforces that the set of columns specified in the UNIQUE constraint are unique in the table, that is, no two rows can have the same values for the set of columns specified in the UNIQUE constraint. - -### Check - -This is used to enforce that data in the specified table meets the requirements specified in the CHECK clause. - -### Default - -This clause is used to specify a default value for the column. If an INSERT statement does not specify a value for the column, then the default value is used. If no default is specified for a column, then the default is NULL. - -An identity column will automatically receive a new value produced by its linked sequence. - -### Deferrable constraints - -Constraints can be deferred using the DEFERRABLE clause. Currently, only foreign key constraints -can be deferred in YugabyteDB. A constraint that is not deferrable will be checked after every row -in a statement. In the case of deferrable constraints, the checking of the constraint can be postponed -until the end of the transaction. - -Constraints marked as INITIALLY IMMEDIATE will be checked after every row in a statement. - -Constraints marked as INITIALLY DEFERRED will be checked at the end of the transaction. - -### IDENTITY columns - -Create the column as an identity column. - -An implicit sequence will be created, attached to it, and new rows will automatically have values assigned from the sequence. IDENTITY columns are implicitly NOT NULL. - -ALWAYS and BY DEFAULT will determine how user-provided values are handled in INSERT and UPDATE statements. - -On an INSERT statement: - -- when ALWAYS is used, a user-provided value is only accepted if the INSERT statement uses OVERRIDING SYSTEM VALUE. -- when BY DEFAULT is used, then the user-provided value takes precedence. See [INSERT statement](../dml_insert/) for reference. (In the COPY statement, user-supplied values are always used regardless of this setting.) - -On an UPDATE statement: - -- when ALWAYS is used, a column update to a value other than DEFAULT will be rejected. -- when BY DEFAULT is used, the column can be updated normally. (OVERRIDING clause cannot be used for the UPDATE statement) - -The `sequence_options` optional clause can be used to override the options of the generated sequence. - -See [CREATE SEQUENCE](../ddl_create_sequence) for reference. - -#### Multiple Identity Columns - -PostgreSQL and YugabyteDB allow a table to have more than one identity column. The SQL standard specifies that a table can have at most one identity column. - -This relaxation primarily aims to provide increased flexibility for carrying out schema modifications or migrations. - -Note that the [INSERT](../dml_insert/) command can only accommodate one override clause for an entire statement. As a result, having several identity columns, each exhibiting distinct behaviours, is not effectively supported. - -### Stored generated columns - -A stored generated column is computed when it is written (inserted or updated) and occupies storage as if it were a normal column. A generated column cannot be written to directly. In INSERT or UPDATE commands, a value cannot be specified for a generated column, but the keyword DEFAULT may be specified. This feature is particularly useful for scenarios requiring precomputed values for indexing, sorting, or filtering, as it reduces computation overhead during queries. - -Several restrictions apply to the definition of generated columns and tables involving generated columns: - -- The generation expression can only use immutable functions and cannot use subqueries or reference anything other than the current row in any way. -- A generation expression cannot reference another generated column. -- A generation expression cannot reference a system column, except tableoid. -- A generated column cannot have a column default or an identity definition. -- A generated column cannot be part of a partition key. - -Further, for partitioned tables: - -- A generated column cannot be part of a partition key. -- If a parent column is a generated column, a child column must also be a generated column using the same expression. -- If a parent column is not a generated column, a child column may be defined to be a generated column or not. - -The following additional considerations apply to the use of generated columns: - -- Generated columns maintain access privileges separately from their underlying base columns. So, it is possible to arrange it so that a particular role can read from a generated column but not from the underlying base columns. - -- Generated columns are, conceptually, updated after BEFORE triggers have run. Therefore, changes made to base columns in a BEFORE trigger will be reflected in generated columns. But conversely, it is not allowed to access generated columns in BEFORE triggers. - -- Generated columns are skipped for logical replication and cannot be specified in a CREATE PUBLICATION column list. - -### TEMPORARY or TEMP - -Using this qualifier will create a temporary table. Temporary tables are visible only in the current client session or transaction in which they are created and are automatically dropped at the end of the session or transaction. Any indexes created on temporary tables are temporary as well. See the section [Creating and using temporary schema-objects](../../creating-and-using-temporary-schema-objects/). - -### UNLOGGED - -Currently the *UNLOGGED* option is ignored. It's handled as *LOGGED* default persistence. - -### TABLESPACE - -Specify the name of the [tablespace](../../../../../explore/going-beyond-sql/tablespaces/) that describes the placement configuration for this table. By default, tables are placed in the `pg_default` tablespace, which spreads the tablets of the table evenly across the cluster. - -### SPLIT INTO - -For hash-sharded tables, you can use the SPLIT INTO clause to specify the number of tablets to be created for the table. The hash range is then evenly split across those tablets. - -Presplitting tablets, using SPLIT INTO, distributes write and read workloads on a production cluster. For example, if you have 3 servers, splitting the table into 30 tablets can provide write throughput on the table. For an example, see [Create a table specifying the number of tablets](#create-a-table-specifying-the-number-of-tablets). - -{{< note title="Note" >}} - -By default, YugabyteDB presplits a table in `ysql_num_shards_per_tserver * num_of_tserver` shards. The SPLIT INTO clause can be used to override that setting on a per-table basis. - -{{< /note >}} - -### SPLIT AT VALUES - -For range-sharded tables, you can use the SPLIT AT VALUES clause to set split points to presplit range-sharded tables. - -**Example** - -```plpgsql -CREATE TABLE tbl( - a int, - b int, - primary key(a asc, b desc) -) SPLIT AT VALUES((100), (200), (200, 5)); -``` - -In the example above, there are three split points and so four tablets will be created: - -- tablet 1: `a=, b=` to `a=100, b=` -- tablet 2: `a=100, b=` to `a=200, b=` -- tablet 3: `a=200, b=` to `a=200, b=5` -- tablet 4: `a=200, b=5` to `a=, b=` - -### COLOCATION - -To create a colocated table, use the following command: - -```sql -CREATE TABLE (columns) WITH (COLOCATION = true); -``` - -In a colocated database, all tables (and their indexes) are colocated by default. To opt a specific table out of colocation, use the following command: - -```sql -CREATE TABLE (columns) WITH (COLOCATION = false); -``` - -This ensures that the table is not stored on the same tablet as the rest of the tables for this database, but instead has its own set of tablets. Use this option for large tables that need to be scaled out. - -{{}} -Setting `COLOCATION = true` has no effect if the database that the table is part of is not colocated, as currently colocation is supported only at the database level. See [Colocated tables](../../../../../additional-features/colocation/) for more details. -{{}} - -### Storage parameters - -Storage parameters, [as defined by PostgreSQL](https://www.postgresql.org/docs/15/sql-createtable.html#SQL-CREATETABLE-STORAGE-PARAMETERS), are ignored and only present for compatibility with PostgreSQL. - -### PARTITION BY - -Partitioning is another term for physically dividing large tables in YugabyteDB into smaller, more manageable tables to improve performance. See [Table partitioning](../../../../../explore/ysql-language-features/advanced-features/partitions/) for more details. - -## Examples - -### Table with primary key - -```plpgsql -yugabyte=# CREATE TABLE sample(k1 int, - k2 int, - v1 int, - v2 text, - PRIMARY KEY (k1, k2)); -``` - -In this example, the first column `k1` will be HASH, while second column `k2` will be ASC. - -```sql{.nocopy} -yugabyte=# \d sample - Table "public.sample" - Column | Type | Collation | Nullable | Default ---------+---------+-----------+----------+--------- - k1 | integer | | not null | - k2 | integer | | not null | - v1 | integer | | | - v2 | text | | | -Indexes: - "sample_pkey" PRIMARY KEY, lsm (k1 HASH, k2) -``` - -### Table with range primary key - -```plpgsql -yugabyte=# CREATE TABLE range(k1 int, - k2 int, - v1 int, - v2 text, - PRIMARY KEY (k1 ASC, k2 DESC)); -``` - -### Table with check constraint - -```plpgsql -yugabyte=# CREATE TABLE student_grade(student_id int, - class_id int, - term_id int, - grade int CHECK (grade >= 0 AND grade <= 10), - PRIMARY KEY (student_id, class_id, term_id)); -``` - -### Table with default value - -```plpgsql -yugabyte=# CREATE TABLE cars(id int PRIMARY KEY, - brand text CHECK (brand in ('X', 'Y', 'Z')), - model text NOT NULL, - color text NOT NULL DEFAULT 'WHITE' CHECK (color in ('RED', 'WHITE', 'BLUE'))); -``` - -### Table with foreign key constraint - -Define two tables with a foreign keys constraint. - -```plpgsql -yugabyte=# CREATE TABLE products(id int PRIMARY KEY, - descr text); -yugabyte=# CREATE TABLE orders(id int PRIMARY KEY, - pid int REFERENCES products(id) ON DELETE CASCADE, - amount int); -``` - -Insert some rows. - -```plpgsql -yugabyte=# SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL SERIALIZABLE; -yugabyte=# INSERT INTO products VALUES (1, 'Phone X'), (2, 'Tablet Z'); -yugabyte=# INSERT INTO orders VALUES (1, 1, 3), (2, 1, 3), (3, 2, 2); - -yugabyte=# SELECT o.id AS order_id, p.id as product_id, p.descr, o.amount FROM products p, orders o WHERE o.pid = p.id; -``` - -```sql{.nocopy} - order_id | product_id | descr | amount -----------+------------+----------+-------- - 1 | 1 | Phone X | 3 - 2 | 1 | Phone X | 3 - 3 | 2 | Tablet Z | 2 -(3 rows) -``` - -Inserting a row referencing a non-existent product is not allowed. - -```plpgsql -yugabyte=# INSERT INTO orders VALUES (1, 3, 3); -``` - -```sql{.nocopy} -ERROR: insert or update on table "orders" violates foreign key constraint "orders_pid_fkey" -DETAIL: Key (pid)=(3) is not present in table "products". -``` - -Deleting a product will cascade to all orders (as defined in the CREATE TABLE statement above). - -```plpgsql -yugabyte=# DELETE from products where id = 1; -yugabyte=# SELECT o.id AS order_id, p.id as product_id, p.descr, o.amount FROM products p, orders o WHERE o.pid = p.id; -``` - -```sql{.nocopy} - order_id | product_id | descr | amount -----------+------------+----------+-------- - 3 | 2 | Tablet Z | 2 -(1 row) -``` - -### Table with unique constraint - -```plpgsql -yugabyte=# CREATE TABLE translations(message_id int UNIQUE, - message_txt text); -``` - -### Create a table specifying the number of tablets - -To specify the number of tablets for a table, you can use the CREATE TABLE statement with the [SPLIT INTO](#split-into) clause. - -```plpgsql -yugabyte=# CREATE TABLE tracking (id int PRIMARY KEY) SPLIT INTO 10 TABLETS; -``` - -### Opt a table out of colocation - -```plpgsql -yugabyte=# CREATE DATABASE company WITH COLOCATION = true; - -yugabyte=# CREATE TABLE employee(id INT PRIMARY KEY, name TEXT) WITH (COLOCATION = false); -``` - -In this example, database `company` is colocated and all tables other than the `employee` table are stored on a single tablet. - -## See also - -- [ALTER TABLE](../ddl_alter_table) -- [CREATE TABLE AS](../ddl_create_table_as) -- [DROP TABLE](../ddl_drop_table) diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_create_type.md b/docs/content/preview/api/ysql/the-sql-language/statements/ddl_create_type.md deleted file mode 100644 index 9c4b9e97d00a..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_create_type.md +++ /dev/null @@ -1,139 +0,0 @@ ---- -title: CREATE TYPE statement [YSQL] -headerTitle: CREATE TYPE -linkTitle: CREATE TYPE -description: Use the CREATE TYPE statement to create a user-defined type in a database. -menu: - preview_api: - identifier: ddl_create_type - parent: statements -aliases: - - /preview/api/ysql/commands/ddl_create_type/ -type: docs ---- - -## Synopsis - -Use the `CREATE TYPE` statement to create a user-defined type in a database. There are five types: composite, enumerated, range, base, and shell. Each has its own `CREATE TYPE` syntax. - -## Syntax - -{{%ebnf%}} - create_type, - create_composite_type, - create_enum_type, - create_range_type, - create_shell_type, - create_base_type, - composite_type_elem, - range_type_option, - base_type_option -{{%/ebnf%}} - -## Semantics - -The order of options in creating range types and base types does not matter. Even the mandatory options `SUBTYPE`, `INPUT`, and `OUTPUT` may appear in any order. - -### *create_composite_type* - -### *create_enum_type* - -### *create_range_type* - -### *create_base_type* - -### *create_shell_type* - -### *composite_type_elem* - -### *range_type_option* - -### *base_type_option* - -- `type_name` specifies the name of this user-defined type. -- `attribute_name` specifies the name of an attribute for this composite type. -- `data_type` specifies the type of an attribute for this composite type. -- `collation` specifies the collation to use for this type. In case this is a composite type, the - attribute data type must be collatable. In case this is a range type, the subtype must be - collatable. -- `label` specifies a quoted label to be a value of this enumerated type. -- `subtype` specifies the type to use for this range type. -- `subtype_operator_class` specifies the operator class to use for the subtype of this range type. -- `canonical_function` specifies the canonical function used when converting range values of this - range type to a canonical form. -- `subtype_diff_function` specifies the subtype difference function used to take the difference - between two range values of this range type. -- `input_function` specifies the function that converts this type's external textual representation - to internal representation. -- `output_function` specifies the function that converts this type's internal representation to - external textual representation. -- `receive_function` specifies the function that converts this type's external binary representation - to internal representation. -- `send_function` specifies the function that converts this type's internal representation to - external binary representation. -- `type_modifier_input_function` specifies the function that converts this type modifier's external - textual representation to internal integer typmod value or throws an error. -- `type_modifier_output_function` specifies the function that converts this type modifier's internal - integer typmod value to external representation. -- `internallength` specifies the size in bytes of this type. -- `alignment` specifies the storage alignment of this type. -- `storage` specifies the storage strategy of this type. This type must be variable length. -- `like_type` specifies the type to copy over the `INTERNALLENGTH`, `PASSEDBYVALUE`, `ALIGNMENT`, - and `STORAGE` values from. -- `category` specifies the category code for this type. -- `PREFERRED` specifies whether this type is preferred for implicit casts in the same category. -- `default` specifies the default value of this type. -- `element` Implies that the type being created is an array; this specifies the type of the array elements. -- `delimiter` specifies the character used to separate array elements in the external textual - representation of values of this type. -- `COLLATABLE` specifies whether collation information may be passed to operations that use this - type. - -## Examples - -Composite type - -```plpgsql -yugabyte=# CREATE TYPE feature_struct AS (id INTEGER, name TEXT); -yugabyte=# CREATE TABLE feature_tab_struct (feature_col feature_struct); -``` - -Enumerated type - -```plpgsql -yugabyte=# CREATE TYPE feature_enum AS ENUM ('one', 'two', 'three'); -yugabyte=# CREATE TABLE feature_tab_enum (feature_col feature_enum); -``` - -Range type - -```plpgsql -yugabyte=# CREATE TYPE feature_range AS RANGE (subtype=INTEGER); -yugabyte=# CREATE TABLE feature_tab_range (feature_col feature_range); -``` - -Base type - -```plpgsql -yugabyte=# CREATE TYPE int4_type; -yugabyte=# CREATE FUNCTION int4_type_in(cstring) RETURNS int4_type - LANGUAGE internal IMMUTABLE STRICT PARALLEL SAFE AS 'int4in'; -yugabyte=# CREATE FUNCTION int4_type_out(int4_type) RETURNS cstring - LANGUAGE internal IMMUTABLE STRICT PARALLEL SAFE AS 'int4out'; -yugabyte=# CREATE TYPE int4_type ( - INPUT = int4_type_in, - OUTPUT = int4_type_out, - LIKE = int4 - ); -yugabyte=# CREATE TABLE int4_table (t int4_type); -``` - -Shell type - -```plpgsql -yugabyte=# CREATE TYPE shell_type; -``` - -## See also - -- [`DROP TYPE`](../ddl_drop_type) diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_drop_index.md b/docs/content/preview/api/ysql/the-sql-language/statements/ddl_drop_index.md deleted file mode 100644 index f862810a4e1e..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_drop_index.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: DROP INDEX statement [YSQL] -headerTitle: DROP INDEX -linkTitle: DROP INDEX -description: Use the DROP INDEX statement to remove one or more indexes from the database. -menu: - preview_api: - identifier: ddl_drop_index - parent: statements -aliases: - - /preview/api/ysql/commands/ddl_drop_index/ -type: docs ---- - -## Synopsis - -Use the `DROP INDEX` statement to remove an index from the database. - -## Syntax - -{{%ebnf%}} - drop_index -{{%/ebnf%}} - -## Semantics - -#### *if_exists* - -Under normal operation, an error is raised if the index does not exist. Adding `IF EXISTS` will quietly ignore any non-existent indexes specified. - -#### *index_name* - -Specify the name of the index to be dropped. Objects associated with the index will be invalidated after the `DROP INDEX` statement is completed. - -#### RESTRICT / CASCADE - -`RESTRICT` (the default) will not drop the index if any objects depend on it. - -`CASCADE` will drop any objects that transitively depend on the index. - -## Example - -Create a table with an index: - -```plpgsql -CREATE TABLE t1(id BIGSERIAL PRIMARY KEY, v TEXT); -CREATE INDEX i1 ON t1(v); -``` - -Verify the index was created: - -```sql -\d t1 -``` - -```output - Table "public.t1" - Column | Type | Collation | Nullable | Default ---------+--------+-----------+----------+-------------------------------- - id | bigint | | not null | nextval('t1_id_seq'::regclass) - v | text | | | -Indexes: - "t1_pkey" PRIMARY KEY, lsm (id HASH) - "i1" lsm (v HASH) -``` - -Drop the index: - -```sql -DROP INDEX i1; -``` - -Use the `\d t1` meta-command to verify that the index no longer exists. - -```sql -\d t1 -``` - -```output - Table "public.t1" - Column | Type | Collation | Nullable | Default ---------+--------+-----------+----------+-------------------------------- - id | bigint | | not null | nextval('t1_id_seq'::regclass) - v | text | | | -Indexes: - "t1_pkey" PRIMARY KEY, lsm (id HASH) -``` - -## See also - -- [`CREATE INDEX`](../ddl_create_index) -- [`INSERT`](../dml_insert) -- [`SELECT`](../dml_select/) diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_drop_table.md b/docs/content/preview/api/ysql/the-sql-language/statements/ddl_drop_table.md deleted file mode 100644 index 23c0e9271814..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_drop_table.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: DROP TABLE statement [YSQL] -headerTitle: DROP TABLE -linkTitle: DROP TABLE -description: Use the DROP TABLE statement to remove one or more tables (with all of their data) from the database. -menu: - preview_api: - identifier: ddl_drop_table - parent: statements -aliases: - - /preview/api/ysql/commands/ddl_drop_table/ -type: docs ---- - -## Synopsis - -Use the `DROP TABLE` statement to remove one or more tables (with all of their data) from the database. - -## Syntax - -{{%ebnf%}} - drop_table -{{%/ebnf%}} - -## Semantics - -### *drop_table* - -#### *if_exists* - -Under normal operation, an error is raised if the table does not exist. Adding `IF EXISTS` will quietly ignore any non-existent tables specified. - -#### *table_name* - -Specify the name of the table to be dropped. Objects associated with the table, such as prepared statements, will be eventually invalidated after the `DROP TABLE` statement is completed. - -#### RESTRICT / CASCADE - -`RESTRICT` is the default and it will not drop the table if any objects depend on it. - -`CASCADE` will drop any objects that transitively depend on the table. - -## Example - -Do this: - -```plpgsql -set client_min_messages = warning; -drop table if exists children, parents cascade; - -create table parents(k int primary key, v text); - -create table children( - k int, parents_k int, v text, - constraint children_pk primary key(k, parents_k), - constraint children_fk foreign key(parents_k) references parents(k)); -\d children -``` -The `\d` meta-command output includes this information: - -``` -Foreign-key constraints: - "children_fk" FOREIGN KEY (parents_k) REFERENCES parents(k) -``` -Now do this: - -```plpgsql -\set VERBOSITY verbose -drop table parents restrict; -``` - -It causes this error: - -``` -2BP01: cannot drop table parents because other objects depend on it -``` - -with this detail: - -``` -constraint children_fk on table children depends on table parents -``` - -Now do this: - -```plpgsql -drop table parents cascade; -\d children -``` - -The 'DROP' now succeeds and the `\d` meta-command shows that the table _"children"_ still exists but that it now as no foreign key constraint to the now-dropped "_parents"_ table. - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`INSERT`](../dml_insert) -- [`SELECT`](../dml_select/) diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_drop_type.md b/docs/content/preview/api/ysql/the-sql-language/statements/ddl_drop_type.md deleted file mode 100644 index 6082449006af..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/ddl_drop_type.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -title: DROP TYPE statement [YSQL] -headerTitle: DROP TYPE -linkTitle: DROP TYPE -description: Use the DROP TYPE statement to remove a user-defined type from the database. -menu: - preview_api: - identifier: ddl_drop_type - parent: statements -aliases: - - /preview/api/ysql/commands/ddl_drop_type/ -type: docs ---- - -## Synopsis - -Use the `DROP TYPE` statement to remove a user-defined type from the database. - -## Syntax - -{{%ebnf%}} - drop_type -{{%/ebnf%}} - -## Semantics - -### *drop_type* - -### *type_name* - -Specify the name of the user-defined type to drop. - -## Examples - -Simple example - -```plpgsql -yugabyte=# CREATE TYPE feature_struct AS (id INTEGER, name TEXT); -yugabyte=# DROP TYPE feature_struct; -``` - -`IF EXISTS` example - -```plpgsql -yugabyte=# DROP TYPE IF EXISTS feature_shell; -``` - -`CASCADE` example - -```plpgsql -yugabyte=# CREATE TYPE feature_enum AS ENUM ('one', 'two', 'three'); -yugabyte=# CREATE TABLE feature_tab_enum (feature_col feature_enum); -yugabyte=# DROP TYPE feature_tab_enum CASCADE; -``` - -`RESTRICT` example - -```plpgsql -yugabyte=# CREATE TYPE feature_range AS RANGE (subtype=INTEGER); -yugabyte=# CREATE TABLE feature_tab_range (feature_col feature_range); -yugabyte=# -- The following should error: -yugabyte=# DROP TYPE feature_range RESTRICT; -yugabyte=# DROP TABLE feature_tab_range; -yugabyte=# DROP TYPE feature_range RESTRICT; -``` - -## See also - -- [`CREATE TYPE`](../ddl_create_type) diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/dml_delete.md b/docs/content/preview/api/ysql/the-sql-language/statements/dml_delete.md deleted file mode 100644 index bdc641f9a2f5..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/dml_delete.md +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: DELETE statement [YSQL] -headerTitle: DELETE -linkTitle: DELETE -description: Use the DELETE statement to remove rows that meet certain conditions, and when conditions are not provided in WHERE clause, all rows are deleted. -menu: - preview_api: - identifier: dml_delete - parent: statements -aliases: - - /preview/api/ysql/commands/dml_delete/ -type: docs ---- - -## Synopsis - -Use the `DELETE` statement to remove rows that meet certain conditions, and when conditions are not provided in WHERE clause, all rows are deleted. `DELETE` outputs the number of rows that are being deleted. - -## Syntax - -{{%ebnf%}} - delete, - returning_clause -{{%/ebnf%}} - -{{< note title="Table inheritance is not yet supported" >}} -The [table_expr](../../../syntax_resources/grammar_diagrams/#table-expr) rule specifies syntax that is useful only when at least one other table inherits one of the tables that the `truncate` statement lists explicitly. See [this note](../ddl_alter_table#table-expr-note) for more detail. Until inheritance is supported, use a bare [table_name](../../../syntax_resources/grammar_diagrams/#table-name). -{{< /note >}} - -See the section [The WITH clause and common table expressions](../../with-clause/) for more information about the semantics of the `common_table_expression` grammar rule. - -## Semantics - -- While the `WHERE` clause allows a wide range of operators, the exact conditions used in the `WHERE` clause have significant performance considerations (especially for large datasets). For the best performance, use a `WHERE` clause that provides values for all columns in `PRIMARY KEY` or `INDEX KEY`. - -### *delete* - -#### WITH [ RECURSIVE ] *with_query* [ , ... ] DELETE FROM [ ONLY ] *table_name* [ * ] [ [ AS ] *alias* ] [USING from_item] [ WHERE *condition* | WHERE CURRENT OF *cursor_name* ] [ [*returning_clause*] (#returning-clause) ] - -##### *with_query* - -Specify the subqueries that are referenced by name in the DELETE statement. - -##### *table_name* - -Specify the name of the table to be deleted. - -##### *alias* - -Specify the identifier of the target table within the DELETE statement. When an alias is specified, it must be used in place of the actual table in the statement. - -##### *from_item* - -A table expression that enables referencing columns from additional tables within the `WHERE` clause. -This follows the identical syntax conventions as the `FROM` clause in a `SELECT` statement, including the ability to define table aliases. -The target table should not be included as a `from_item` unless performing a self-join operation, in which case the target table must appear with an alias in the `from_item` list. - -### *returning_clause* - -#### RETURNING - -Specify the value to be returned. When the _output_expression_ references a column, the existing value of this column (deleted value) is used to returned. - -#### *output_name* - -## Examples - -Create a sample table, insert a few rows, then delete one of the inserted row. - -```plpgsql -CREATE TABLE sample(k1 int, k2 int, v1 int, v2 text, PRIMARY KEY (k1, k2)); -``` - -```plpgsql -INSERT INTO sample VALUES (1, 2.0, 3, 'a'), (2, 3.0, 4, 'b'), (3, 4.0, 5, 'c'); -``` - -```plpgsql -yugabyte=# SELECT * FROM sample ORDER BY k1; -``` - -``` - k1 | k2 | v1 | v2 -----+----+----+---- - 1 | 2 | 3 | a - 2 | 3 | 4 | b - 3 | 4 | 5 | c -(3 rows) -``` - -```plpgsql -DELETE FROM sample WHERE k1 = 2 AND k2 = 3; -``` - -```plpgsql -yugabyte=# SELECT * FROM sample ORDER BY k1; -``` - -``` -DELETE 1 -``` - -``` - k1 | k2 | v1 | v2 -----+----+----+---- - 1 | 2 | 3 | a - 3 | 4 | 5 | c -(2 rows) -``` - -## See also - -- [`INSERT`](../dml_insert) -- [`SELECT`](../dml_select/) -- [`UPDATE`](../dml_update/) diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/dml_insert.md b/docs/content/preview/api/ysql/the-sql-language/statements/dml_insert.md deleted file mode 100644 index 31dc6208aee4..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/dml_insert.md +++ /dev/null @@ -1,337 +0,0 @@ ---- -title: INSERT statement [YSQL] -headerTitle: INSERT -linkTitle: INSERT -description: Use the INSERT statement to add one or more rows to the specified table. -menu: - preview_api: - identifier: dml_insert - parent: statements -type: docs ---- - -## Synopsis - -Use the INSERT statement to add one or more rows to the specified table. - -## Syntax - -{{%ebnf%}} - insert, - returning_clause, - column_values, - conflict_target, - conflict_action -{{%/ebnf%}} - -See the section [The WITH clause and common table expressions](../../with-clause/) for more information about the semantics of the `common_table_expression` grammar rule. - -## Semantics - -Constraints must be satisfied. - -### *insert* - -### *table_name* - -Specify the name of the table. If the specified table does not exist, an error is raised. - -### *column_names* - -Specify a comma-separated list of columns names. If a specified column does not exist, an error is raised. Each of the primary key columns must have a non-null value. - -### OVERRIDING SYSTEM VALUE - -When you provide this clause, any values provided for identity columns will override the default sequence-generated values. - -If an identity column is defined as `GENERATED ALWAYS`, it will raise an error to insert an explicit value (other than `DEFAULT`) without setting either `OVERRIDING SYSTEM VALUE` or `OVERRIDING USER VALUE` clauses. (When an identity column is defined as `GENERATED BY DEFAULT`, `OVERRIDING SYSTEM VALUE` will be the normal behavior and specifying it does nothing, but YugabyteDB allows it as an extension.) - -### OVERRIDING USER VALUE - -When you provide this clause, any values provided for identity columns is ignored, and the default sequence-generated values will be applied. - -This clause is helpful when copying values between tables. - -Writing `INSERT INTO table1 OVERRIDING USER VALUE SELECT * FROM table0` copies all non-identity columns from `table0` to `table1`, while the values of the identity columns in `table1` is generated by the sequences associated with `table1`. - -### VALUES clause - -- Each of the values list must have the same length as the columns list. -- Each value must be convertible to its corresponding (by position) column type. -- Each value literal can be an expression. - -### ON CONFLICT clause - -- The target table must have at least one column (list) with either a unique index -or a unique constraint. We shall refer to this as a unique key. The argument of VALUES -is a relation that must include at least one of the target table's unique keys. -Some of the values of this unique key might be new, and others might already exist -in the target table. - -- The basic aim of INSERT ON CONFLICT is to insert the rows with new values of -the unique key and to update the rows with existing values of the unique key to -set the values of the remaining specified columns to those in the VALUES relation. -In this way, the net effect is either to insert or to update; and for this reason -the INSERT ON CONFLICT variant is often colloquially referred to as "upsert". - -To optimize performance, you can set the [yb_insert_on_conflict_read_batch_size](../../../../../reference/configuration/yb-tserver/#yb-insert-on-conflict-read-batch-size) configuration parameter to batch upserts. This reduces the number of network round trips required compared to performing the operations serially. - -### *returning_clause* - -### *column_values* - -### *conflict_target* - -### *conflict_action* - -```sql -DO NOTHING | DO UPDATE SET *update_item* [ , ... ] [ WHERE *condition* ] -``` - -#### *update_item* - -#### *condition* - -### Compatibility - -The SQL standard specifies that OVERRIDING SYSTEM VALUE can only be specified if an identity column that is generated always exists. YugabyteDB allows the clause in any case and ignores it if it is not applicable. - -## Examples - -First, the bare insert. Create a sample table. - -```plpgsql -yugabyte=# CREATE TABLE sample(k1 int, k2 int, v1 int, v2 text, PRIMARY KEY (k1, k2)); -``` - -Insert some rows. - -```plpgsql -yugabyte=# INSERT INTO sample VALUES (1, 2.0, 3, 'a'), (2, 3.0, 4, 'b'), (3, 4.0, 5, 'c'); -``` - -Check the inserted rows. - -```plpgsql -yugabyte=# SELECT * FROM sample ORDER BY k1; -``` - -```output - k1 | k2 | v1 | v2 -----+----+----+---- - 1 | 2 | 3 | a - 2 | 3 | 4 | b - 3 | 4 | 5 | c -``` - -Next, a basic "upsert" example. Re-create and re-populate the sample table. - -```plpgsql -yugabyte=# DROP TABLE IF EXISTS sample CASCADE; -``` - -```plpgsql -yugabyte=# CREATE TABLE sample( - id int CONSTRAINT sample_id_pk PRIMARY KEY, - c1 text CONSTRAINT sample_c1_NN NOT NULL, - c2 text CONSTRAINT sample_c2_NN NOT NULL); -``` - -```plpgsql -yugabyte=# INSERT INTO sample(id, c1, c2) - VALUES (1, 'cat' , 'sparrow'), - (2, 'dog' , 'blackbird'), - (3, 'monkey' , 'thrush'); -``` - -Check the inserted rows. - -```plpgsql -yugabyte=# SELECT id, c1, c2 FROM sample ORDER BY id; -``` - -```output - id | c1 | c2å -----+--------+----------- - 1 | cat | sparrow - 2 | dog | blackbird - 3 | monkey | thrush -``` - -Demonstrate "on conflict do nothing". In this case, you don't need to specify the conflict target. - -```plpgsql -yugabyte=# INSERT INTO sample(id, c1, c2) - VALUES (3, 'horse' , 'pigeon'), - (4, 'cow' , 'robin') - ON CONFLICT - DO NOTHING; -``` - -Check the result. -The non-conflicting row with id = 4 is inserted, but the conflicting row with id = 3 is NOT updated. - -```plpgsql -yugabyte=# SELECT id, c1, c2 FROM sample ORDER BY id; -``` - -```output - id | c1 | c2 -----+--------+----------- - 1 | cat | sparrow - 2 | dog | blackbird - 3 | monkey | thrush - 4 | cow | robin -``` - -Demonstrate the real "upsert". In this case, you DO need to specify the conflict target. Notice the use of the -EXCLUDED keyword to specify the conflicting rows in the to-be-upserted relation. - -```plpgsql -yugabyte=# INSERT INTO sample(id, c1, c2) - VALUES (3, 'horse' , 'pigeon'), - (5, 'tiger' , 'starling') - ON CONFLICT (id) - DO UPDATE SET (c1, c2) = (EXCLUDED.c1, EXCLUDED.c2); - -``` - -Check the result. -The non-conflicting row with id = 5 is inserted, and the conflicting row with id = 3 is updated. - -```plpgsql -yugabyte=# SELECT id, c1, c2 FROM sample ORDER BY id; -``` - -```output - id | c1 | c2 -----+-------+----------- - 1 | cat | sparrow - 2 | dog | blackbird - 3 | horse | pigeon - 4 | cow | robin - 5 | tiger | starling -``` - -We can make the "update" happen only for a specified subset of the -excluded rows. We illustrate this by attempting to insert two conflicting rows -(with id = 4 and id = 5) and one non-conflicting row (with id = 6). -And you specify that the existing row with c1 = 'tiger' should not be updated -with "WHERE sample.c1 <> 'tiger'". - -```plpgsql -INSERT INTO sample(id, c1, c2) - VALUES (4, 'deer' , 'vulture'), - (5, 'lion' , 'hawk'), - (6, 'cheeta' , 'chaffinch') - ON CONFLICT (id) - DO UPDATE SET (c1, c2) = (EXCLUDED.c1, EXCLUDED.c2) - WHERE sample.c1 <> 'tiger'; -``` - -Check the result. -The non-conflicting row with id = 6 is inserted; the conflicting row with id = 4 is updated; -but the conflicting row with id = 5 (and c1 = 'tiger') is NOT updated; - -```plpgsql -yugabyte=# SELECT id, c1, c2 FROM sample ORDER BY id; -``` - -```output - id | c1 | c2 -----+--------+----------- - 1 | cat | sparrow - 2 | dog | blackbird - 3 | horse | pigeon - 4 | deer | vulture - 5 | tiger | starling - 6 | cheeta | chaffinch -``` - -Notice that this restriction is legal too: - -```sql -WHERE EXCLUDED.c1 <> 'lion' -``` - -Finally, a slightly more elaborate "upsert" example. Re-create and re-populate the sample table. -Notice that id is a self-populating surrogate primary key and that c1 is a business unique key. - -```plpgsql -yugabyte=# DROP TABLE IF EXISTS sample CASCADE; -``` - -```plpgsql -CREATE TABLE sample( - id INTEGER GENERATED ALWAYS AS IDENTITY CONSTRAINT sample_id_pk PRIMARY KEY, - c1 TEXT CONSTRAINT sample_c1_NN NOT NULL CONSTRAINT sample_c1_unq unique, - c2 TEXT CONSTRAINT sample_c2_NN NOT NULL); -``` - -```plpgsql -INSERT INTO sample(c1, c2) - VALUES ('cat' , 'sparrow'), - ('deer' , 'thrush'), - ('dog' , 'blackbird'), - ('horse' , 'vulture'); -``` - -Check the inserted rows. - -```plpgsql -yugabyte=# SELECT id, c1, c2 FROM sample ORDER BY c1; -``` - -```output - id | c1 | c2 -----+-------+----------- - 1 | cat | sparrow - 2 | deer | thrush - 3 | dog | blackbird - 4 | horse | vulture -``` - -Now do the upsert. Notice that this illustrates the usefulness -of the WITH clause to define the to-be-upserted relation -before the INSERT clause and use a subselect instead of -a VALUES clause. We also specify the conflict columns -indirectly by mentioning the name of the unique constrained -that covers them. - -```plpgsql -yugabyte=# WITH to_be_upserted AS ( - SELECT c1, c2 FROM (VALUES - ('cat' , 'chaffinch'), - ('deer' , 'robin'), - ('lion' , 'duck'), - ('tiger' , 'pigeon') - ) - AS t(c1, c2) - ) - INSERT INTO sample(c1, c2) SELECT c1, c2 FROM to_be_upserted - ON CONFLICT ON CONSTRAINT sample_c1_unq - DO UPDATE SET c2 = EXCLUDED.c2; -``` - -Check the inserted rows. - -```plpgsql -yugabyte=# SELECT id, c1, c2 FROM sample ORDER BY c1; -``` - -```output - id | c1 | c2 -----+-------+----------- - 1 | cat | chaffinch - 2 | deer | robin - 3 | dog | blackbird - 4 | horse | vulture - 7 | lion | duck - 8 | tiger | pigeon -``` - -## See also - -- [COPY](../cmd_copy) -- [CREATE TABLE](../ddl_create_table) -- [SELECT](../dml_select/) diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/dml_select.md b/docs/content/preview/api/ysql/the-sql-language/statements/dml_select.md deleted file mode 100644 index 304ab820631f..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/dml_select.md +++ /dev/null @@ -1,134 +0,0 @@ ---- -title: SELECT statement [YSQL] -headerTitle: SELECT -linkTitle: SELECT -description: Use the SELECT statement to retrieve rows of specified columns that meet a given condition from a table. -menu: - preview_api: - identifier: dml_select - parent: statements -aliases: - - /preview/api/ysql/commands/dml_select/ -type: docs ---- - -## Synopsis - -Use the `SELECT` statement to retrieve rows of specified columns that meet a given condition from a table. It specifies the columns to be retrieved, the name of the table, and the condition each selected row must satisfy. - -The same syntax rules govern a subquery, wherever you might use one—like, for example, in an [`INSERT` statement](../dml_insert/). Certain syntax spots, for example a `WHERE` clause predicate or the actual argument of a function like `sqrt()`, allow only a scalar subquery. - -## Syntax - -{{%ebnf%}} - select, - with_clause, - select_list, - trailing_select_clauses, - common_table_expression, - fn_over_window, - ordinary_aggregate_fn_invocation, - within_group_aggregate_fn_invocation, - grouping_element, - order_expr -{{%/ebnf%}} - -See the section [The WITH clause and common table expressions](../../with-clause/) for more information about the semantics of the `common_table_expression` grammar rule. - -## Semantics - -- An error is raised if the specified `table_name` does not exist. -- `*` represents all columns. - -While the where clause allows a wide range of operators, the exact conditions used in the where clause have significant performance considerations (especially for large datasets). - -For details on `from_item` see [SELECT](https://www.postgresql.org/docs/15/static/sql-select.html) in the PostgreSQL documentation. - -The `fn_over_window` rule denotes the special kind of `SELECT` list item that must be used to invoke a window function and that may be used to invoke an aggregate function. (Window functions are known as analytic functions in the terminology of some SQL database systems.) The dedicated diagram that follows the main diagram for the `select` rule shows the `FILTER` and the `OVER` keywords. You can see that you _cannot_ invoke a function in this way without specifying an `OVER` clause—and that the `OVER` clause requires the specification of the so-called [_window_](../../../exprs/window_functions/invocation-syntax-semantics/#the-window-definition-rule) that gives this invocation style its name. The `FILTER` clause is optional and may be used _only_ when you invoke an aggregate function in this way. All of this is explained in the [Window function invocation—SQL syntax and semantics](../../../exprs/window_functions/invocation-syntax-semantics/) section within the major section [Window functions](../../../exprs/window_functions/). - -The `ordinary_aggregate_fn_invocation` rule and the `within_group_aggregate_fn_invocation` rule denote the special kinds of `SELECT` list item that are used to invoke an aggregate function (when it isn't invoked as a window function). When an aggregate function is invoked in either of these two ways, it's very common to do so in conjunction with the `GROUP BY` and `HAVING` clauses. All of this is explained in the [Aggregate function invocation—SQL syntax and semantics](../../../exprs/aggregate_functions/invocation-syntax-semantics/) section within the major section [Aggregate functions](../../../exprs/aggregate_functions/). - -When you understand the story of the invocation of these two kinds of functions from the accounts in the [Window functions](../../../exprs/window_functions/) section and the [Aggregate functions](../../../exprs/aggregate_functions/) section, you can use the `\df` meta-command in `ysqlsh` to discover the status of a particular function, thus: - -``` -\df row_number - -... | Argument data types | Type -... +----------------------------------------+-------- -... | | window - - -\df rank - -... | Argument data types | Type -... +----------------------------------------+-------- -... | | window -... | VARIADIC "any" ORDER BY VARIADIC "any" | agg - - -\df avg - -... | Argument data types | Type -... +----------------------------------------+-------- -... | bigint | agg -... | | agg -``` - -- A function whose type is listed as _"window"_ can be invoked _only_ as a window function. See this account of [`row_number()`](../../../exprs/window_functions/function-syntax-semantics/row-number-rank-dense-rank/#row-number). - -- A function whose type is listed _both_ as _"window"_ and as _agg_ can be invoked: - - - _either_ as a window function using the `fn_over_window` syntax—see this account of [`rank()`](../../../exprs/window_functions/function-syntax-semantics/row-number-rank-dense-rank/#rank) - - - _or_ as a so-called [within-group hypothetical-set aggregate function](../../../exprs/aggregate_functions/function-syntax-semantics/#within-group-hypothetical-set-aggregate-functions) using the `within_group_aggregate_fn_invocation` syntax—see this account of [`rank()`](../../../exprs/aggregate_functions/function-syntax-semantics/rank-dense-rank-percent-rank-cume-dist/#rank). - -- A function whose type is listed only as _"agg"_ can, in fact, be invoked _either_ as an aggregate function using the `ordinary_aggregate_fn_invocation` syntax _or_ as a window function using the `fn_over_window` syntax. The `avg()` function is described in the "Aggregate functions" major section in the subsection [`avg()`, `count()`, `max()`, `min()`, `sum()`](../../../exprs/aggregate_functions/function-syntax-semantics/avg-count-max-min-sum/). See its subsections [`GROUP BY` syntax](../../../exprs/aggregate_functions/function-syntax-semantics/avg-count-max-min-sum/#group-by-syntax) and [`OVER` syntax](../../../exprs/aggregate_functions/function-syntax-semantics/avg-count-max-min-sum/#over-syntax) for, respectively, the `ordinary_aggregate_fn_invocation` and the `fn_over_window` invocation alternatives. - -- Notice that the three functions [`mode()`](../../../exprs/aggregate_functions/function-syntax-semantics/mode-percentile-disc-percentile-cont/#mode), [`percentile_disc()`](../../../exprs/aggregate_functions/function-syntax-semantics/mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont), and [`percentile_cont()`](../../../exprs/aggregate_functions/function-syntax-semantics/mode-percentile-disc-percentile-cont/#percentile-disc-percentile-cont) are exceptions to this general rule (and they are the _only_ exceptions). These functions are referred to as [within-group ordered-set aggregate functions](../../../exprs/aggregate_functions/function-syntax-semantics/#within-group-ordered-set-aggregate-functions). `\df` lists the type of these functions only as _"agg"_. But these _cannot_ be invoked as window functions. The attempt causes this error: - - ``` - 42809: WITHIN GROUP is required for ordered-set aggregate mode - ``` - -**Note:** The documentation in the [Aggregate functions](../../../exprs/aggregate_functions/) major section usually refers to the syntax that the `ordinary_aggregate_fn_invocation` rule and the `within_group_aggregate_fn_invocation` rule jointly govern as the [`GROUP BY` syntax](../../../exprs/aggregate_functions/function-syntax-semantics/avg-count-max-min-sum/#group-by-syntax) because it's these two syntax variants (and _only_ these two) can be used together with the `GROUP BY` clause (and therefore the `HAVING` clause). And it usually refers to the syntax that the `fn_over_window` rule governs as the [`OVER` syntax](../../../exprs/aggregate_functions/function-syntax-semantics/avg-count-max-min-sum/#over-syntax) because this syntax variant _requires_ the use of the `OVER` clause. Moreover, the use of the `GROUP BY` clause (and therefore the `HAVING` clause) is illegal with this syntax variant. - -## Examples - -Create two sample tables. - -```plpgsql -yugabyte=# CREATE TABLE sample1(k1 bigint, k2 float, v text, PRIMARY KEY (k1, k2)); -``` - -```plpgsql -yugabyte=# CREATE TABLE sample2(k1 bigint, k2 float, v text, PRIMARY KEY (k1, k2)); -``` - -Insert some rows. - -```plpgsql -yugabyte=# INSERT INTO sample1(k1, k2, v) VALUES (1, 2.5, 'abc'), (1, 3.5, 'def'), (1, 4.5, 'xyz'); -``` - -```plpgsql -yugabyte=# INSERT INTO sample2(k1, k2, v) VALUES (1, 2.5, 'foo'), (1, 4.5, 'bar'); -``` - -Select from both tables using join. - -```plpgsql -yugabyte=# SELECT a.k1, a.k2, a.v as av, b.v as bv FROM sample1 a LEFT JOIN sample2 b ON (a.k1 = b.k1 and a.k2 = b.k2) WHERE a.k1 = 1 AND a.k2 IN (2.5, 3.5) ORDER BY a.k2 DESC; -``` - -``` - k1 | k2 | av | bv -----+-----+-----+----- - 1 | 3.5 | def | - 1 | 2.5 | abc | foo -(2 rows) -``` - -## See also - -- [`CREATE TABLE`](../ddl_create_table) -- [`INSERT`](../dml_insert) diff --git a/docs/content/preview/api/ysql/the-sql-language/statements/dml_update.md b/docs/content/preview/api/ysql/the-sql-language/statements/dml_update.md deleted file mode 100644 index 6e37588addb5..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/statements/dml_update.md +++ /dev/null @@ -1,125 +0,0 @@ ---- -title: UPDATE statement [YSQL] -headerTitle: UPDATE -linkTitle: UPDATE -description: Use UPDATE to modify values of specified columns in all rows that meet certain conditions. When conditions are not provided in WHERE clause, all rows update. -menu: - preview_api: - identifier: dml_update - parent: statements -aliases: - - /preview/api/ysql/commands/dml_update/ -type: docs ---- - -## Synopsis - -Use the `UPDATE` statement to modify the values of specified columns in all rows that meet certain conditions, and when conditions are not provided in WHERE clause, all rows are updated. `UPDATE` outputs the number of rows that are being updated. - -## Syntax - -{{%ebnf%}} - update, - returning_clause, - update_item, - column_values, - column_names -{{%/ebnf%}} - -{{< note title="Table inheritance is not yet supported" >}} -The [table_expr](../../../syntax_resources/grammar_diagrams/#table-expr) rule specifies syntax that is useful only when at least one other table inherits one of the tables that the `truncate` statement lists explicitly. See [this note](../ddl_alter_table#table-expr-note) for more detail. Until inheritance is supported, use a bare [table_name](../../../syntax_resources/grammar_diagrams/#table-name). -{{< /note >}} - -See the section [The WITH clause and common table expressions](../../with-clause/) for more information about the semantics of the `common_table_expression` grammar rule. - -## Semantics - -Updating columns that are part of an index key including PRIMARY KEY is not yet supported. - -- While the `WHERE` clause allows a wide range of operators, the exact conditions used in the where clause have significant performance considerations (especially for large datasets). For the best performance, use a `WHERE` clause that provides values for all columns in `PRIMARY KEY` or `INDEX KEY`. - -### *with_query* - -Specify the subqueries that are referenced by name in the `UPDATE` statement. - -### *table_name* - -Specify the name of the table to be updated. - -### *alias* - -Specify the identifier of the target table within the `UPDATE` statement. When an alias is specified, it must be used in place of the actual table in the statement. - -### *column_name* - -Specify the column in the table to be updated. - -### *expression* - -Specify the value to be assigned to a column. When the expression is referencing a column, the old value of this column is used to evaluate. - -### *from_item* - -A table expression that enables referencing columns from additional tables within the `WHERE` clause. -This follows the identical syntax conventions as the `FROM` clause in a `SELECT` statement, including the ability to define table aliases. -The target table should not be included as a `from_item` unless performing a self-join operation, in which case the target table must appear with an alias in the `from_item` list. - -### *output_expression* - -Specify the value to be returned. When the `output_expression` is referencing a column, the new value of this column (updated value) is used to evaluate. - -### *subquery* - -Specify the SELECT subquery statement. Its selected values will be assigned to the specified columns. - -## Examples - -Create a sample table, insert a few rows, then update the inserted rows. - -```plpgsql -yugabyte=# CREATE TABLE sample(k1 int, k2 int, v1 int, v2 text, PRIMARY KEY (k1, k2)); -``` - -```plpgsql -yugabyte=# INSERT INTO sample VALUES (1, 2.0, 3, 'a'), (2, 3.0, 4, 'b'), (3, 4.0, 5, 'c'); -``` - -```plpgsql -yugabyte=# SELECT * FROM sample ORDER BY k1; -``` - -``` - k1 | k2 | v1 | v2 -----+----+----+---- - 1 | 2 | 3 | a - 2 | 3 | 4 | b - 3 | 4 | 5 | c -(3 rows) -``` - -```plpgsql -yugabyte=# UPDATE sample SET v1 = v1 + 3, v2 = '7' WHERE k1 = 2 AND k2 = 3; -``` - -``` -UPDATE 1 -``` - -```plpgsql -yugabyte=# SELECT * FROM sample ORDER BY k1; -``` - -``` - k1 | k2 | v1 | v2 -----+----+----+---- - 1 | 2 | 3 | a - 2 | 3 | 7 | 7 - 3 | 4 | 5 | c -(2 rows) -``` - -## See also - -- [`DELETE`](../dml_delete/) -- [`INSERT`](../dml_insert) -- [`SELECT`](../dml_select/) diff --git a/docs/content/preview/api/ysql/the-sql-language/with-clause/_index.md b/docs/content/preview/api/ysql/the-sql-language/with-clause/_index.md deleted file mode 100644 index 01c0ee8fa3e9..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/with-clause/_index.md +++ /dev/null @@ -1,161 +0,0 @@ ---- -title: The WITH clause and common table expressions (CTEs) [YSQL] -headerTitle: The WITH clause and common table expressions -linkTitle: WITH clause -description: How to use the WITH clause and common table expressions (CTEs) -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: with-clause - parent: the-sql-language - weight: 300 -type: indexpage -showRightNav: true ---- - -A `WITH` clause can be used as part of a `SELECT` statement, an `INSERT` statement, an `UPDATE` statement, or a `DELETE` statement. For this reason, the functionality is described in this dedicated section. - -## Introduction - -The `WITH` clause lets you name one or several so-called _common table expressions_. This latter term is a term of art, and doesn't reflect the spellings of SQL keywords. It is normally abbreviated to CTE, and this acronym will be used throughout the rest of the whole of this "WITH clause" section. See the section [WITH clause—SQL syntax and semantics](./with-clause-syntax-semantics) for the formal treatment. - -Briefly, a CTE names a SQL [sub]statement which may be any of these kinds: `SELECT`, `VALUES`, `INSERT`, `UPDATE`, or `DELETE`. And the `WITH` clause is legal at the start of any of these kinds of statement : `SELECT`, `INSERT`, `UPDATE`, or `DELETE`. (`VALUES` is missing from the second list.) There are two kinds of CTE: the _ordinary_ kind; and the _recursive_ kind. - -The statement text that the ordinary kind of CTE names has the same syntax as the corresponding SQL statement that you issue at top level. However, a recursive CTE may be used _only_ in a `WITH` clause. - -A CTE can, for example, be used to provide values for, say, an `INSERT` like this: - -```plpgsql -set client_min_messages = warning; -drop table if exists t1 cascade; -create table t1(k int primary key, v int not null); - -with a(k, v) as ( - select g.v, g.v*2 from generate_series(11, 20) as g(v) - ) -insert into t1 -select k, v from a; - -select k, v from t1 order by k; -``` - -This component is an example of a CTE: - -```sql -a(k, v) as (select g.v, g.v*2 from generate_series(11, 20) as g(v)) -``` - -Notice the (optional) parenthesised parameter list that follows the name, just as in the definition of a schema-level [`VIEW`](../statements/ddl_create_view). - -This is the result: - -```output - k | v -----+---- - 11 | 22 - 12 | 24 - 13 | 26 - 14 | 28 - 15 | 30 - 16 | 32 - 17 | 34 - 18 | 36 - 19 | 38 - 20 | 40 -``` - -When a data-changing [sub]statement (`INSERT`, `UPDATE` , or `DELETE`) is named in a CTE, and, when it uses a `RETURNING` clause, the returned values can be used in other CTEs and in the overall statement's final [sub]statement. Here is an example. - -```plpgsql -set client_min_messages = warning; -drop table if exists t2 cascade; -create table t2(k int primary key, v int not null); - -with moved_rows(k, v) as ( - delete from t1 - where k > 15 - returning k, v) -insert into t2(k, v) -select k, v from moved_rows; - -( - select 't1' as table_name, k, v from t1 - union all - select 't2' as table_name, k, v from t2 - ) -order by table_name, k; -``` - -This is the result: - -```output - table_name | k | v -------------+----+---- - t1 | 11 | 22 - t1 | 12 | 24 - t1 | 13 | 26 - t1 | 14 | 28 - t1 | 15 | 30 - t2 | 16 | 32 - t2 | 17 | 34 - t2 | 18 | 36 - t2 | 19 | 38 - t2 | 20 | 40 -``` - -The central notion is that each CTE that you name in a `WITH` clause can then be invoked by its name, either in a subsequent CTE in that `WITH` clause or in the overall statement's final, main, [sub]statement. In this way, a CTE is analogous, in the declarative programming domain of SQL, to a procedure or a function in the domain of an "if-then-else" programming language, bringing the modular programming benefit of hiding names, and the implementations that they stand for, from scopes that have no interest in them. - -Finally, the use of a _recursive_ CTE in a `WITH` clause enables advanced functionality, like graph analysis. For example, an _"employees"_ table often has a self-referential foreign key like _"manager_id"_ that points to the table's primary key, _"employee_id"_. `SELECT` statements that use a recursive CTE allow the reporting structure to be presented in various ways. This result shows the reporting paths of employees, in an organization with a strict hierarchical reporting scheme, in depth-first order. See the section [Pretty-printing the top-down depth-first report of paths](./emps-hierarchy/#pretty-printing-the-top-down-depth-first-report-of-paths). - -```output - emps hierarchy ----------------- - mary - fred - alfie - dick - george - john - alice - bill - joan - edgar - susan -``` - -## The organization of the remainder of this section - -The remainder of this section has the following subsections: - -- [`WITH` clause—SQL syntax and semantics](./with-clause-syntax-semantics/) - -- [The recursive CTE](./recursive-cte/) - -- [Case study—Using a recursive CTE to traverse an employee hierarchy](./emps-hierarchy/) - -- [Using a recursive CTE to traverse graphs of all kinds](./traversing-general-graphs/) - -- [Case study—using a recursive CTE to compute Bacon Numbers for actors listed in the IMDb](./bacon-numbers/) - -{{< tip title="Performance considerations" >}} - -A SQL statement that uses a `WITH` clause sometimes gets a worse execution plan than the semantically equivalent statement that _doesn’t_ use a `WITH` clause. The explanation is usually that a “push-down” optimization of a restriction or projection hasn’t penetrated into the `WITH` clause’s CTE. You can usually avoid this problem by manually pushing down what you’d hope would be done automatically into your spellings of the `WITH` clause’s CTEs. - -Anyway, the ordinary good practice principle holds even more here: always check the execution plans of the SQL statements that your application issues, on representative samples of data, before moving the code to the production environment. - -{{< /tip >}} - -{{< tip title="Downloadable WITH clause demonstration scripts" >}} - -The [`recursive-cte-code-examples.zip`](https://raw.githubusercontent.com/yugabyte/yugabyte-db/master/sample/recursive-cte-code-examples/recursive-cte-code-examples.zip) file contains the `.sql` scripts that illustrate the use of the [recursive CTE](./recursive-cte/): - -- [Case study—Using a recursive CTE to traverse an employee hierarchy](./emps-hierarchy/) - -- [Using a recursive CTE to traverse graphs of all kinds](./traversing-general-graphs/) - -- [Case study—using a recursive CTE to compute Bacon Numbers for actors listed in the IMDb](./bacon-numbers/). - -All of these studies make heavy use of regular (non-recursive) CTEs. They therefore show the power of the CTE in a natural, rather than a contrived, way. - -After unzipping it in a convenient new directory, you'll see a `README.txt`. It tells you how to start, in turn, a few master-scripts. Simply start each in `ysqlsh`. You can run these time and again. Each one always finishes silently. You can see the reports that they produce on the dedicated spool directories and confirm that the files that are spooled are identical to the corresponding reference copies that are delivered in the zip-file. -{{< /tip >}} diff --git a/docs/content/preview/api/ysql/the-sql-language/with-clause/bacon-numbers/_index.md b/docs/content/preview/api/ysql/the-sql-language/with-clause/bacon-numbers/_index.md deleted file mode 100644 index 5b9528de8d9f..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/with-clause/bacon-numbers/_index.md +++ /dev/null @@ -1,233 +0,0 @@ ---- -title: > - Case study: using a recursive CTE to compute Bacon Numbers on IMDb data -headerTitle: > - Case study: using a recursive CTE to compute Bacon Numbers for actors listed in the IMDb -linkTitle: > - Case study: Bacon Numbers from IMDb -description: Case study showing how to use a recursive CTE to solve the "Six Degrees of Kevin Bacon" problem using IMDb data. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: bacon-numbers - parent: with-clause - weight: 60 -type: indexpage -showRightNav: true ---- - -The Bacon Numbers problem, sometimes referred to as "The Six_Degrees of Kevin Bacon" (see [this Wikipedia article](https://en.wikipedia.org/wiki/Six_Degrees_of_Kevin_Bacon)), is a specific formulation of the general problem of tracing paths in an undirected cyclic graph. It is a well-known set-piece exercise in graph analysis and is a popular assignment task in computer science courses. Most frequently, solutions are implemented in an "if-then-else" language like Java. Interestingly, solutions can be implemented in SQL and, as this section will show, the amount of SQL needed is remarkably small. - -## Representing actors and movies data - -The Bacon Numbers problem is conventionally formulated in the context of the data represented in the IMDb—an acronym for Internet Movie Database. See [this Wikipedia article](https://en.wikipedia.org/wiki/IMDb). The data are freely available from IMDb but it's better, for the purposes of this section's pedagogy, to use sufficient subsets of the total IMDb content. These subsets restrict the population to the movies in which Kevin Bacon has acted and project the facts about the actors and movies to just their names. This entity relationship diagram (a.k.a. ERD) depicts the sufficient subset of the IMDb: - -![imdb-erd](/images/api/ysql/the-sql-language/with-clause/bacon-numbers/imdb-erd.jpg) - -- _each actor must act in at least one movie_ -- _each movie's cast must list at least one actor_ - -The actors are the nodes in an undirected cyclic graph. There is an edge between two actors when they both have acted together in at least one movie. - -The ERD implies the conventional three-table representation with an _"actors_ table, a _"movies_table"_, and a _"cast_members"_ intersection table. Create them with this script: - -##### `cr-actors-movies-cast-members-tables.sql` - -```plpgsql -drop table if exists actors cascade; -drop table if exists movies cascade; -drop table if exists cast_members cascade; - -create table actors( - actor text primary key); - -create table movies(movie text primary key); - -create table cast_members( - actor text not null, - movie text not null, - - constraint cast_members_pk primary key(actor, movie), - - constraint cast_members_fk1 foreign key(actor) - references actors(actor) - match full - on delete cascade - on update restrict, - - constraint cast_members_fk2 foreign key(movie) - references movies(movie) - match full - on delete cascade - on update restrict - ); -``` - -Of course, the IMDb has facts like _date of birth_, _nationality_, and so on for the actors and like _release date_, _language_ and so on for the movies. The information would doubtless allow the _"cast_members"_ table to have columns like _"character_name"_. The data that this case study uses happen to include the movie release date, in parentheses, after the movie name in a single text field. The pedagogy is sufficiently served without parsing out these two facts into separate columns in the _"movies"_ table. - -Notice that the notion of a graph is so far only implied. A derived _"edges"_ table makes the graph explicit. An edge exists between a pair of actors if they are both on the cast list of the same one or more movies. The SQL needed to populate the _"edges"_ table from the _"cast_members"_ table is straightforward. - -When the paths have been found, it's useful to be able to annotate each edge with the list of movies that are responsible for its existence. The annotation code could, of course, derive this information dynamically. But it simplifies the overall coding scheme if a denormalization is adopted to annotate the paths at the time that they are discovered. Another departure from strict purity simplifies the overall coding scheme further. If the row for the edge between a particular pair of actors records the _list_ of movies that brought it (rather than recording many edges, each with a single-valued _"movie"_ attribute), then the path-tracing code that the section [Using a recursive CTE to traverse graphs of all kinds](../traversing-general-graphs/) presented can be used "as is". To this end, the columns that represent the actor pair in the _"edges"_ table are called _"node_1"_ and "_node_2"_ rather than the more natural _"actor_1"_ and _"actor_2"_. - -**Note:** The previous paragraph was stated as something of a sketch. In fact, each edge between a pair of actors is recorded twice—once in each direction, as is described in the section [Graph traversal using the denormalized "edges" table design](../traversing-general-graphs/undirected-cyclic-graph/#graph-traversal-using-the-denormalized-edges-table-design). Each of the edges in such a pair is annotated with the same list of movies. - -This code creates the _"edges"_ table and the procedure that populates it. - -##### `cr-actors-movies-edges-table-and-proc.sql` - -```plpgsql -drop table if exists edges cascade; - -create table edges( - node_1 text, - node_2 text, - movies text[], - constraint edges_pk primary key(node_1, node_2), - constraint edges_fk_1 foreign key(node_1) references actors(actor), - constraint edges_fk_2 foreign key(node_2) references actors(actor)); - -drop procedure if exists insert_edges() cascade; - -create or replace procedure insert_edges() - language plpgsql -as $body$ -begin - delete from edges; - - with - v1(node_1, movie) as ( - select actor, movie from cast_members), - - v2(node_2, movie) as ( - select actor, movie from cast_members) - - insert into edges(node_1, node_2, movies) - select node_1, node_2, array_agg(movie order by movie) - from v1 inner join v2 using (movie) - where node_1 < node_2 - group by node_1, node_2; - - insert into edges(node_1, node_2, movies) - select node_2 as node_1, node_1 as node_2, movies - from edges; -end; -$body$; -``` - -Notice the second `INSERT` statement that re-inserts all the discovered directed edges in the reverse direction. The value of this denormalization is explained in the section [Finding the paths in a general undirected cyclic graph](../traversing-general-graphs/undirected-cyclic-graph/). - -## Create a stored procedure to decorate path edges with the list of movies that brought each edge - -The stored procedure (actually a table function) will annotate each successive edge along each path in the specified table with the list of movies that brought that edge. - -- When there are relatively few paths in all, as there are with the synthetic data that the section [Computing Bacon Numbers for a small set of synthetic actors and movies data](./synthetic-data/) uses, it's convenient simply to show all the decorated paths. - -- However, with a data set as big as the IMDb (even the [imdb.small.txt](http://cs.oberlin.edu/~gr151/imdb/imdb.small.txt) subset has 160 shortest paths), it's useful to be able to name a candidate actor and to annotate just the shortest path (more carefully stated, one of the shortest paths) from Kevin Bacon to the candidate. The site [The Oracle of Bacon](https://oracleofbacon.org/movielinks.php) exposes this functionality. - -The first formal parameter of the function _"decorated_paths_report()"_ is mandatory and specifies the table in which the paths are represented. The second optional formal parameter, _"terminal"_, lets you specify the last node along a path. If you omit it, the meaning is _"report all the paths"_; and it you supply it, the meaning is _"report the path to the specified actor"_. - -Dynamic SQL is therefore needed for two reasons, each of which alone is a sufficient reason: - -- The table name isn't known until run-time. - -- There may, or may not, be a `WHERE` clause. - -##### `cr-decorated-paths-report.sql` - -```plpgsql -drop function if exists decorated_paths_report(text, text) cascade; - --- This procedure is more elaborate than you'd expect because of GitHub Issue 3286. --- It says this in the report: --- --- Commit 9d66392 added support for cursor. Our next releases will have this work. --- However, there are a few pending issues. --- --- Meanwhile, this code works around the issue by using a single-row SELECT... INTO. --- This is made possible by using array_agg(). But you cannot aggregate arrays of --- different cardinalities. So a second-level workaround is used. Each array in --- the result set is cast to "text" for aggregation and then cast back to the array --- that it represents in the body of the FOREACH loop that steps through the text --- values that have been aggregated. --- --- When a "stable" release supports the use of a cursor variable, this implementation --- will be replaced by a more straightforward version. - -create function decorated_paths_report(tab in text, terminal in text default null) - returns table(t text) - language plpgsql -as $body$ -<>declare - indent constant int := 3; - q constant text := ''''; - - stmt_start constant text := 'select array_agg((path::text) '|| - 'order by cardinality(path), terminal(path), path) '|| - 'from ?'; - - where_ constant text := ' where terminal(path) = $1'; - - all_terminals_stmt constant text := replace(stmt_start, '?', tab); - one_terminal_stmt constant text := replace(stmt_start, '?', tab)||where_; - - paths text[] not null := '{}'; - p text not null := ''; - path text[] not null := '{}'; - - distance int not null := -1; - match text not null := ''; - prev_match text not null := ''; - movies text[] not null := '{}'; - movie text not null := ''; - pad int not null := 0; -begin - case terminal is null - when true then - execute all_terminals_stmt into paths; - else - execute one_terminal_stmt into paths using terminal; - end case; - - foreach p in array paths loop - path := p::text[]; - distance := cardinality(path) - 1; - match := terminal(path); - - -- Rule off before each new match. - case match = prev_match - when false then - t := rpad('-', 50, '-'); return next; - end case; - prev_match := match; - - pad := 0; - t := rpad(' ', pad)||path[1]; return next; - <> - for j in 2..cardinality(path) loop - select e.movies - into strict b.movies - from edges e - where e.node_1 = path[j - 1] and e.node_2 = path[j]; - - pad := pad + indent; - <> - foreach movie in array movies loop - t := rpad(' ', pad)||movie::text; return next; - end loop movies_loop; - - pad := pad + indent; - t := rpad(' ', pad)||path[j]; return next; - end loop step_loop; - end loop; - t := rpad('-', 50, '-'); return next; -end b; -$body$; -``` - -## Computing Bacon Numbers for synthetic data and the real IMDb data - -The section [Computing Bacon Numbers for a small set of synthetic actors and movies data](./synthetic-data/) demonstrates the approach using a small data set. - -The section [Computing Bacon Numbers for real IMDb data](./imdb-data/) shows how to ingest the raw `imdb.small.txt` file into the same representation that was used for the synthetic data. (The subsection [Download and ingest some IMDb data](./imdb-data/#download-and-ingest-some-imdb-data) explains how to download the IMDb subset that this case study uses.) - -While a straightforward use of a recursive CTE can be used to produce the solution for the small synthetic data set quickly, it fails to complete before crashing (see the section [Stress testing different find_paths() implementations on maximally connected graphs](../traversing-general-graphs/stress-test/)) when it's applied to the ingested `imdb.small.txt` data. The approach described in the [How to implement early path pruning](../traversing-general-graphs/undirected-cyclic-graph/#how-to-implement-early-path-pruning) section comes to the rescue. diff --git a/docs/content/preview/api/ysql/the-sql-language/with-clause/traversing-general-graphs/_index.md b/docs/content/preview/api/ysql/the-sql-language/with-clause/traversing-general-graphs/_index.md deleted file mode 100644 index 9ad4e8d07214..000000000000 --- a/docs/content/preview/api/ysql/the-sql-language/with-clause/traversing-general-graphs/_index.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -title: Using a recursive CTE to traverse a general graph -headerTitle: Using a recursive CTE to traverse graphs of all kinds -linkTitle: Traversing general graphs -description: This section shows how to use a recursive CTE to traverse graphs of all kinds. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: traversing-general-graphs - parent: with-clause - weight: 50 -type: indexpage -showRightNav: true ---- - -{{< tip title="Download the code examples" >}} - -All of the `.sql` scripts that this section presents for copy-and-paste at the ysqlsh prompt are included for download in a zip-file. - -[Download `recursive-cte-code-examples.zip`](https://raw.githubusercontent.com/yugabyte/yugabyte-db/master/sample/recursive-cte-code-examples/recursive-cte-code-examples.zip). - -After unzipping it on a convenient new directory, you'll see a `README.txt`. It tells you how to start a couple of master-scripts. Simply start each in ysqlsh. You can run them time and again. They always finish silently. You can see the reports that they produce on dedicated spool directories and confirm that your reports are identical to the reference copies that are delivered in the zip-file. -{{< /tip >}} - -A _graph_ is a network of _nodes_ (sometimes called vertices) and _edges_ (sometimes called arcs). An edge joins a pair of nodes. - -- When every edge is _undirected_ (i.e. the relationship between the nodes at each end of an edge is symmetrical), then the graph is called an _undirected_ graph. -- When every edge is _directed_, then the graph is called a _directed_ graph. -- One representation scheme for an _undirected_ edge chooses to represent this single abstract edge as two physical edges, one in each direction. In this sense, you can think of a general _undirected_ graph as a _directed_ graph where at least one pair of nodes is connected by a pair of edges, one in each direction. By extension of this thinking, a _directed_ graph is one where there is maximum one _directed_ edge between any pair of nodes. -- Most commonly (by virtue of the nature of the inter-node relationship) a graph has either only _undirected_ edges or only _directed_ edges. -- A _path_ is a traversal of the graph that starts at one node, goes along an edge to another node, and then along an edge to yet another node, and so on. You can certainly give meaning to a path that returns many times to nodes that have already been encountered. This is called a _cycle_. But, by convention, this is not done because an algorithm that discovered such a path would run for ever. Rather, it's usual that the definition of a path includes the notion that it has no cycles, so that no node is visited more than once. (A cycle that runs round and round between a pair of immediately connected nodes is always discounted.) -- A graph that has the potential for cycles is called a _cyclic_ graph. And one with no such potential is called an _acyclic_ graph. -- The two degrees of freedom, _undirected_ or _directed_ and _cyclic_ or _acyclic_ are orthogonal—i.e. all four combinations are possible. -- The most general kind of graph is _undirected_ and _cyclic_ and the design of the traversal scheme must account for this. This general scheme will always work on the more specialized kinds of graph. -- A graph might be just _a single set of connected nodes_, where every node can be reached from any other node; or it might be two or more isolated _subgraphs_ of mutually connected nodes where there exists no path between any pair of subgraphs. -- The most general graph traversal scheme, therefore, must discover all the isolated _subgraphs_ and apply the required traversal scheme to each of them. This is beyond the scope of this overall section. It deals only with single connected graphs. - -You can use a recursive CTE to find the paths in an _undirected_ _cyclic_ graph. But you must design the SQL explicitly to accommodate the fact that the edges are _undirected_ and you must include an explicit predicate to prevent cycles. Because each other kind of graph described below is a specialization of the graph whose description immediately precedes its description, you can, as mentioned, use progressively simpler SQL to trace paths in these—as long as you know, _a priori_, what kind of graph you're dealing with. - -## Undirected cyclic graph - -Here is an example of such a graph. - -![undirected-cyclic-graph](/images/api/ysql/the-sql-language/with-clause/traversing-general-graphs/undirected-cyclic-graph.jpg) - -The [Bacon Numbers problem](../bacon-numbers/) is specified in the context of this kind of graph. Actors have acted in one or more movies. And the cast of a movie is one or more actors. The set of actors of interest is represented as the nodes of a single connected graph, one of whose nodes is Kevin Bacon. When any pair of actors have acted in the same movie, an edge exist between the two of them. - -- Setting aside notions like "starring role", "supporting role", and so on, the relationship between a pair of actors who acted in the same movie is symmetrical. So the graph is _undirected_. - -- Because, for example, John Malkovich, Brad Pitt, and Winona Ryder all acted in the 1999 movie "Being John Malkovich", you could traverse a path from John Malkovich to Brad Pitt to Winona Ryder and back to John Malkovich, and so on indefinitely. Or you could traverse a path from John Malkovich to Winona Ryder to Brad Pitt and back to John Malkovich, and so on indefinitely. So the graph, in general, is _undirected_ _and_ _cyclic_. - -The movies-and-actors use case brings out another point. In general, the edges have properties—in this case the list of movies in which a particular pair of actors have both acted. - -You might argue (particularly if you're starting to think of a representation in a SQL database) that the properties of an edge must be single-valued and that there should therefore be many edges between a pair of actors who've been in several movies in common—one for each movie. This is just an example of the usual distinction between the conceptual design (the entity-relationship model) and the logical design (the table model). The basic graph traversal problem is, beyond doubt, best conceptualized in terms of a model that allows just zero or one edge between node pairs. Even so, the traversal implementation can easily accommodate a physical model that allows more than one edge between node pairs. - -## Directed cyclic graph - -A _directed_ _cyclic_ graph is a specialization of the _undirected_ _cyclic_ graph. Here, the relationship between the nodes at the two ends of an edge is asymmetrical, so each edge has a direction. - -![directed-cyclic-graph](/images/api/ysql/the-sql-language/with-clause/traversing-general-graphs/directed-cyclic-graph.jpg) - -For example, you might keep a record of all the video-conferences that are held among the employees in an organization. A conference has exactly one _host attendee_ and one or many _invited attendees_. Alice might host several conferences at which Joe attends as an invitee. And Joe might host several conferences at which Alice attends as an invitee. The graph will therefor have two _directed_ edges between Alice and Joe (as is shown between the nodes _n4_ and _n6_ in the picture above). The property of the edge from Alice to Joe could include the list of start timestamps and durations of the conferences that Alice hosted and that Joe attended. And the property of edge from Joe to Alice would then include the list of start timestamps and durations of the conferences that Joe hosted and that Alice attended. - -## Directed acyclic graph - -A _directed_ _acyclic_ graph is a specialization of the _directed_ _cyclic_ graph. - -![directed-acyclic-graph](/images/api/ysql/the-sql-language/with-clause/traversing-general-graphs/directed-acyclic-graph.jpg) - -A car manufacturer will record the parts decomposition of each model that it makes. Cars have major components like engines, brakes, exhaust systems, and so on. And many car models (especially, for example, the sedan and wagon variants of a particular marque) will share the same major components. The relationship "is composed of" is clearly asymmetrical. Major components, of course, have their own parts breakdowns into subcomponents, as do the subcomponents in turn all the way down to atomic subcomponents like nuts, bolts, washers, and so on. - -## Rooted tree - -A rooted tree is a specialization of the _directed_ _acyclic_ graph. - -![rooted-tree](/images/api/ysql/the-sql-language/with-clause/traversing-general-graphs/rooted-tree.jpg) - -The reporting tree for employees in an organization, whose traversal was discussed in the section [Case study—Using a recursive CTE to traverse an employee hierarchy](../emps-hierarchy/) is the canonical example of such a graph. - -## Finding the paths in each of the four kinds of graph - -The next sections show how to find the paths in each of the four kinds of graph that are described above. - -### Representing the different kinds of graph in a SQL database - -[This section](./graph-representation/) describes the minimal table structure for representing graphs. - -### Common code for traversing all kinds of graph - -[This section](./common-code/) describes common artifacts, like tables into which to insert the results to support subsequent _ad hoc_ queries, and various helper functions and procedures, upon which the described traversal schemes all depend. - -### Path finding approaches - -The method for the _directed_ _cyclic_ graph is identical to that for the _undirected_ _cyclic_ graph. Each implements cycle prevention. But the table for the _directed_ _cyclic_ graph usually records just the edge as either _"from a to b"_ or _"from b to a"_. Using the recommended representation for edges in a SQL database table, this is the difference: - -- The table for the _undirected_ _cyclic_ graph for the edge between nodes _"a"_ and _"b"_ records it twice, as _"from a to b"_ and _"from b to a"_. - -- But the table for the _directed_ _cyclic_ graph usually records just the edge as either _"from a to b"_ or _"from b to a"_. Occasionally, when the _directed_ relationship between a particular pair of nodes is reciprocal, two edges will be recorded as _"from a to b"_ and _"from b to a"_. - -The method for the _directed_ _acyclic_ graph is identical to that for the rooted tree. Neither implements cycle prevention because there is no need for this (both these kinds of graph, by definition, have no cycles). For the same reason, there is never more than one _directed_ edge between any pair of nodes. - -The methods are described in these sections: - -- [Finding the paths in a general undirected cyclic graph](./undirected-cyclic-graph/) - -- [Finding the paths in a directed cyclic graph](./directed-cyclic-graph/) - -- [Finding the paths in a directed acyclic graph](./directed-acyclic-graph/) - -- [Finding the paths in a rooted tree](./rooted-tree/) diff --git a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/_index.md b/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/_index.md deleted file mode 100644 index d7a910628d86..000000000000 --- a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/_index.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: User-defined subprograms and anonymous blocks [YSQL] -headerTitle: User-defined subprograms and anonymous blocks—"language SQL" and "language plpgsql" -linkTitle: User-defined subprograms and anonymous blocks -description: Describes how YSQL supports user-defined subprograms and anonymous blocks implemented in SQL and PL/pgSQL. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: user-defined-subprograms-and-anon-blocks - parent: api-ysql - weight: 70 -type: indexpage -showRightNav: true ---- - -This topic area is often referred to as "stored procedures". This is definitely sloppy because, for example: - -- Anonymous blocks are not stored. -- The distinct SQL keywords _procedure_ and _function_ express different meanings. -- You often hear "stored procedures" used interchangeably with "stored procedures and triggers". But triggers are their own distinct phenomenon and this major section doesn't mention them. - -Moreover, SQL and PL/pgSQL are not the only implementation languages. - -This is why the precise, but more longwinded, wording is used for this major section's title and in the explanations that it presents. Nevertheless, when users say "stored procedures" in an informal context, there is very rarely any confusion. - -{{< note title="This major section describes only user-defined subprograms and anonymous blocks that are implemented in SQL or PL/pgSQL." >}} -A subsection that describes user-defined subprograms that are implemented using C will be added in a later version of this major section. -{{< /note >}} - -{{< tip title="Always immediately revoke 'execute' from any newly-created subprogram." >}} -For historical reasons, _execute_ is implicitly granted to _public_ on a newly-created subprogram. You must therefore follow every _create function s.f()_ and _create procedure s.p()_ statement with something like this: - -```output -revoke all on function s.f() from public; -``` - -and then, maybe later on during the code installation flow, something like this: - -```output -grant execute on function s.f() to ... -``` -{{< /tip >}} - -## User-defined subprograms - -YSQL supports user-defined functions and user-defined procedures. Each of the words _function_ and _procedure_ is a YSQL keyword. The term of art _subprogram_ will be used as an umbrella term that denotes either a function or a procedure. It is not a YSQL keyword. A user-defined subprogram has an owner, a name, and lives in a schema. Its source code definition, and its various attributes, are persisted in the catalog. - -Not every programming language distinguishes between functions and procedures with different keywords. But the distinction between the two kinds of subprogram is the same in [PostgreSQL](https://www.yugabyte.com/postgresql/) and YSQL as it is in other languages. - -{{< note title="The YSQL documentation uses the term 'subprogram' where the PostgreSQL documentation uses 'routine'." >}} -The two terms of art, _subprogram_ and _routine_, are used when writing about programming to mean the same thing. The YSQL documentation uses _subprogram_; and the PostgreSQL documentation uses _routine_). Some SQL statements support the keyword _routine_ where they accept either _function_ or _procedure_—for example: - -```output -grant execute on routine s.f(int) to r; -drop routine s.f(int); -``` - -Other statements do not allow this. For example, there is no _create routine_ statement. Yugabyte recommends that you avoid using _routine_ in SQL statements because it's unrealistic to impose a programming rule _never_ to use _procedure_ or _function_ where _routine_ is allowed. Such resulting mixed use can only make searching more tricky.{{< /note >}} - -### Implementation languages - -YSQL supports two native implementation languages for user-defined subprograms: _[language sql](./language-sql-subprograms)_; and _[language plpgsql](./language-plpgsql-subprograms)_. It supports only one language for [anonymous blocks](./#anonymous-blocks): _language plpgsql_. - -### Functions - -A function produces a (possibly compound) value and is invoked by writing it as a term within a surrounding expression of arbitrary complexity—and this is the _only_ way to invoke a function. The degenerate case is that the function invocation is the entirety of the expression. You evaluate an expression, in SQL, by writing it as the argument of a bare _select_ or at one of the many syntax spots in a more complex SQL statement where it's legal to write a placeholder in a _[prepare](../the-sql-language/statements/perf_prepare/)_ statement. An expression is evaluated in PL/pgSQL source code just as it would be in other languages—as the argument of an explicit or implicit assignment. (Invoking a subprogram using an expression to provide the value for one of its arguments provides an example of implicit assignment.) - -A function is a syntactic peer of a variable in PL/pgSQL or a column in SQL. The overwhelmingly common convention is to name variables and columns with a noun or noun phrase. (It would be very odd to see a variable called _get_time_.) Stylists argue, therefore, that functions should also be named with a noun or noun phrase to denote the value that invoking the function produces. - -(Notwithstanding this, there are lots of SQL built-in functions with imperative names like _generate_series()_ or _gen_random_uuid()_ rather than, say, _generated_series()_ or _generated_random_uuid()_.) - -### Procedures - -The purpose of a procedure is to _do_ something. The syntax of the _[create [or replace] procedure](../the-sql-language/statements/ddl_create_procedure/)_ statement therefore does not allow specifying _returns_ in its declaration. A procedure can be invoked only as the argument of a _[call](../the-sql-language/statements/cmd_call/)_ statement—both in top-level SQL and in PL/pgSQL source code. - -Stylists argue, therefore, that procedures should be named with an imperative verb or verb phrase to denote the action that the invocation performs. - -A procedure _can_ have argument(s) whose mode is _inout_. Use this option if you want to pass back, say, a success/failure status to the caller. See the subsection [Example with 'inout' arguments](../the-sql-language/statements/cmd_call/#example-with-inout-arguments) in the _call_ statement account. - -### Procedures were first supported in PostgreSQL Version 11 - -PostgreSQL Version 10, and earlier versions, did not support procedures. Therefore, the critical distinction explained above was not supported: - -- A function is invoked as a term in an expression and names a computed value (and ideally has no side effects). -- A procedure _does_ something (i.e. its _raison d'être_ is to have side effects) and is invoked using the dedicated _call_ statement. - -Therefore, in PostgreSQL Version 10 and earlier, functions allowed formal arguments with the _out_ and _inout_ mode; and the _returns_ clause was optional. (It's also possible to create a function that has _returns void_. This has the same effect as a single data type were specified and _null_ were returned.) PostgreSQL is duty-bound to allow application code that ran in an older version to work in the same way in a newer version. This means that even in the _current_ version of PostgreSQL, a function can _still_ be used where procedure is the proper choice. - -{{< tip title="Respect the intended distinction between functions and procedures." >}} -Yugabyte recommends that you avoid the possibility to use a function for the purpose that a procedure is intended by regarding a function's _returns_ clause as mandatory and avoiding the use of _out_ and _inout_ arguments. This implies that, for a table function, you should prefer _returns table(...)_ over _returns setof_. The latter requires a list of _out_ arguments that correspond to the columns that you list, when you use the former, within the parenthesis of _table(...)_. -{{< /tip >}} - -### Procedure invocation syntax - -{{%ebnf%}} - call_procedure, - actual_arg -{{%/ebnf%}} - -## Anonymous blocks - -You can also execute a so-called anonymous block. This is a procedure that's defined _only_ by its source code—in other words, has no name and isn't persisted in the catalog. You simply execute it immediately using the _[do](../the-sql-language/statements/cmd_do/)_ SQL statement. An anonymous block differs from statements like _insert_, _update_, and _delete_ in that it cannot be the object of a _[prepare](../the-sql-language/statements/perf_prepare)_ statement. (However, any DML SQL statements that an anonymous block issues are implicitly prepared. And you take advantage of the preparation by repeatedly executing the same _do_ statement.) - -YSQL inherits, from PostgreSQL, the restriction that the implementation language for an anonymous block must be PL/pgSQL; and there are no plans for PostgreSQL to be enhanced to support other languages for anonymous blocks. The defining text of an anonymous block is governed by the grammar for the _[plpgsql_block_stmt](../syntax_resources/grammar_diagrams/#plpgsql-block-stmt)_—a particular kind of PL/pgSQL compound statement. Notice that _[plpgsql_block_stmt](../syntax_resources/grammar_diagrams/#plpgsql-block-stmt)_ denotes a rule in the [YSQL Grammar](../syntax_resources/grammar_diagrams/). When the context has established the intended meaning, the prose equivalent "block statement" will be used instead. - -Because an anonymous block cannot be the target of a _prepare_ statement, it cannot be parameterized. There are therefore very few use cases where using an anonymous block rather than using a procedure is a sensible choice unless you want to use it no more than once so that parameterization is not needed. Remember that a requirement not to create a persistent schema-object can be met by using a temporary user-defined procedure. See the section [Creating and using temporary schema-objects](../the-sql-language/creating-and-using-temporary-schema-objects/). - -## Why use user-defined subprograms? - -Some development shops avoid the use of user-defined subprograms altogether and use only top-level SQL statements as the client-side code's API to the database. And they manage to built fully-functional applications by sticking strictly to this paradigm. This implies, then, that the use of user-defined subprograms is _optional_—and this, in turn, implies the need to explain the benefits of using them. The explanation is well rehearsed both in the documentation and general marketing literature of suppliers of practical RDBMSs and in no end of third-party text books and blogs. It rests on these main points: - -- The run-time SQL statements, _select_, _insert_, _update_, _delete_, and _commit_ implement primitive direct data manipulation operations. Often, one of these by itself is not enough to implement a particular business requirement. -- User-defined procedures encapsulate one or several primitive direct data manipulation operations to perform specified _atomic_ business transactions that typically make coordinated changes to the contents of several tables. -- User-defined functions encapsulate complex queries that typically access several tables rather like a view does but with the critical benefit that a function can be parameterized while a view cannot. Such complex queries implement specified business requirements. - -In other words, a pure SQL API implements primitive operations that are specified in terms of data; and an API defined by user-defined subprograms implements higher-level operations that are specified in terms of business purpose. Just like with any layered API scheme, the higher level of abstraction (here the business purpose level) can be implemented by several different variants of the lower level of abstraction scheme (here the data level) so that the design of the lower level scheme, whatever variant is chosen, is hidden behind the higher level scheme's API. This implies that the details of the lower level scheme can be changed (for example to improve performance) while the higher level API specification remains unchanged. The data level API therefore becomes an _internal_ API. - -Against this background, the benefits of using user-defined subprograms are clear: - -- A subprogram API to the database hides all the details of the implementation like the following from client-side code: the names of the tables and their columns; the names of schemas that house them; the names of the owners of these objects; the existence of indexes, constraints, and triggers; and the SQL statements that persist and retrieve the data. -- Client code will have no privileges on any of the artifacts that implement the internal data level API. Rather, they will have only the _execute_ privilege on the subprograms that implement the business purpose API. This implies that the subprograms will be created with _security definer_ mode—and not with _security invoker_ mode. (See the [section that describes the _security_ subprogram attribute](./subprogram-attributes/alterable-subprogram-attributes/#security).) -- Because the engineers who implement the database's subprogram API own all of the code for persisting and changing the data, they are uniquely empowered to take full responsibility for the data's correctness. Client code is empower to perform _only_ the changes that the business specifies—and unspecified changes are therefore simply impossible. -- The subprogram encapsulation means, too, that every business transaction is done with just a single client-server round trip so that intermediate results that the client does not need do not have to be marshaled or transferred between server and client. This brings a performance benefit relative to an approach that invokes each of the low-level SQL statements that are needed to implement a business transaction in its own client-server call. - -Everybody who works with software will recognize that this is nothing other than the decades-old paradigm of exposing a module, in the overall context of modular software design, via an API that's defined by a set of purpose-oriented subprograms—and hiding all the implementation details behind that API. Here, the module is the database that implements the overall application's data persistence and retrieval requirements. (Other modules will implement other requirements like, for example, managing the end-user's graphic interface to the application's functionality.) - -## "Hard shell" case-study - -The approach to overall application design that hides everything about SQL statements and what these operate on from client-side code behind an API that's implemented as user-defined subprograms is sometimes referred to as the _"hard shell"_ approach. (The metaphor emphasizes the impenetrability of the procedural encapsulation.) A self-contained implementation of such a scheme is available for you to download, study, and run here: - -- **[ysql-case-studies/hard-shell](https://github.com/YugabyteDB-Samples/ysql-case-studies/tree/main/hard-shell)** - -This case-study is one among several. You can install all of them in a dedicated cluster that uses several databases. The overall framework implements a convention-based scheme that guarantees that the roles that own the objects that jointly implement a particular case-study can own objects only in the particular database that houses that study—and that there's no risk of collision between role names. Start with the _README_ for the overall multitenancy scheme here: - -- **[ysql-case-studies](https://github.com/YugabyteDB-Samples/ysql-case-studies/tree/main/)** - -It takes only minutes to download the code and then run the scripts that install it all and test it all. As a bonus, you can install and run the code without making any changes in a Vanilla [PostgreSQL](https://www.yugabyte.com/postgresql/) cluster to demonstrate [the full compatibility between PostgreSQL and YSQL](https://www.yugabyte.com/postgresql/postgresql-compatibility/). It has been tested using both Version 11 (upon which the YSQL implementation (prior to v2.25.0) is based) and the _current_ PostgreSQL version. - -## Creating, altering, and dropping subprograms - -These are the relevant SQL statements: - -- _[create [or replace] function](../the-sql-language/statements/ddl_create_function/)_ -- _[alter function](../the-sql-language/statements/ddl_alter_function/)_ -- _[create [or replace] procedure](../the-sql-language/statements/ddl_create_procedure/)_ -- _[alter procedure](../the-sql-language/statements/ddl_alter_procedure/)_ -- _[drop function](../the-sql-language/statements/ddl_drop_function/)_ -- _[drop procedure](../the-sql-language/statements/ddl_drop_procedure/)_ diff --git a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/_index.md b/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/_index.md deleted file mode 100644 index 9524b9785502..000000000000 --- a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/_index.md +++ /dev/null @@ -1,204 +0,0 @@ ---- -title: PL/pgSQL (a.k.a. "language plpgsql") subprograms [YSQL] -headerTitle: PL/pgSQL (a.k.a. "language plpgsql") subprograms -linkTitle: > - "language plpgsql" subprograms -description: Describes PL/pgSQL functions and procedures. These are also known as "language plpgsql" subprograms.) [YSQL]. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: language-plpgsql-subprograms - parent: user-defined-subprograms-and-anon-blocks - weight: 30 -type: indexpage -showRightNav: true ---- - -PL/pgSQL is a conventional, block-structured, imperative programming language designed to execute in the PostgreSQL server, and by extension in the YSQL server, for the specific purpose of executing SQL statements and dealing with the outcomes that they produce. It executes in the same process as SQL itself. And it uses the same underlying implementation primitives. This has these hugely valuable consequences: - -- The identical set of data types, with identical semantics, is available in both top-level SQL and in PL/pgSQL. -- Expression syntax and semantics are identical in both top-level SQL and in PL/pgSQL. -- All of the SQL built-in functions are available, with the same semantics, in PL/pgSQL. - -PL/pgSQL's basic syntax conventions and repertoire of simple and compound statements seem to be inspired by Ada. Here are some examples: - -- _a := b + c;_ -- _return a + d_; -- _declare... begin... exception... end;_ -- _if... then... elsif... then... else... end if;_ -- _case... when... then... else... end case;_ - -However, PL/pgSQL lacks very many of Ada's features. Here are a couple of notable missing Ada features: - -- packages -- the ability to define functions and procedures within _declare_ sections - -On the other hand, PL/pgSQL extends Ada with a wealth of language features that target its specific use for implementing user-defined subprograms that are stored in, and that execute within, a RDBMS. Here are some examples: - -```output -if some_boolean then - insert into s.t(v) values(some_local_variable); -end if; -``` - -and: - -```output -foreach val in array values_array loop - insert into s.t(v) values(val) returning k into new_k; - new_ks_array := new_ks_array||new_k; -end loop; -``` - -You choose PL/pgSQL as the implementation language for a user-defined subprogram by including _language plpgsql_ in the subprogram's header. Its Ada-like features make _language plpgsql_ subprograms very much more expressive and generally useful than _language sql_ subprograms. See the [example that shows how to insert a master row together with its details rows](../language-sql-subprograms/#insert-master-and-details) in the section [SQL (a.k.a. "language sql") subprograms](../language-sql-subprograms/). A _language sql_ procedure cannot meet the requirement because it has no mechanism that allows the autogenerated new _masters_ row's primary key value to be used as the new _details_ rows' foreign key value. The _language plpgsql_ procedure manages the task trivially because you can populate a local variable when you _insert_ into the _masters_ table thus: - -```output -insert into s.masters(mv) values(new_mv) returning mk into new_mk; -``` - -And then you can reference the local variable in the next _insert_ statement, into the _details_ table, thus: - -```output -insert into s.details(mk, dv) -select new_mk, u.v -from unnest(dvs) as u(v); -``` - -Here's another example procedure that shows various ways to return the result set from a _select_ statement—either directly or by using a loop that allows you to intervene with arbitrary processing. Notice too the difference between so-called _static SQL_ where you write the statement as direct embedded constructs in PL/pgSQL that you fix when at subprogram creation time or as a _text_ value that you can assemble and submit at run time. - -```plpgsql -create schema s; -create table s.t(k serial primary key, v int not null); -insert into s.t(v) select g.v from generate_series(11, 100, 11) as g(v); - -create function s.f(v_min in int, mode in text = 'static qry') - returns table(val int) - set search_path = pg_catalog, pg_temp - security definer - language plpgsql -as $body$ -begin - case mode - when 'static qry' then - return query select v from s.t where v > v_min order by v; - - when 'static loop' then - declare - x s.t.v%type not null := 0; - begin - for x in (select v from s.t where v > v_min order by v) loop - val := x + 1; - return next; - end loop; - end; - - when 'dynamic qry' then - return query execute format('select v from s.%I where v > $1 order by v', 't') using v_min; - - when 'dynamic loop' then - declare - x s.t.v%type not null := 0; - begin - for x in execute format('select v from s.%I where v > $1 order by v', 't') using v_min loop - val := - case - when x < 85 then x + 3 - else x + 7 - end; - return next; - end loop; - end; - end case; -end; -$body$; -``` - -Test the _static_ and the _dynamic_ _qry_ variants first. Take advantage of the default value for the _mode_ formal argument just for the demonstration effect: - -```plpgsql -select s.f(v_min=>40); -select s.f(v_min=>40, mode=>'dynamic qry'); -``` - -Each produces the same result, thus: - -```output - 44 - 55 - 66 - 77 - 88 - 99 -``` - -Next test the _static_ _loop_ variant: - -```plpgsql -select s.f(v_min=>40, mode=>'static loop'); -``` - -It produces this result: - -```output - 45 - 56 - 67 - 78 - 89 - 100 -``` - -Finally, test the _dynamic_ _loop_ variant: - -```plpgsql -select s.f(v_min=>40, mode=>'dynamic loop'); -``` - -It produces this result: - -```output - 47 - 58 - 69 - 80 - 95 - 106 -``` - -{{< tip title="Don't use PL/pgSQL to do procedurally what SQL can do declaratively." >}} -The purpose of the code shown above is to illustrate the syntax and semantics of some useful PL/pgSQL constructs. However, you should not use procedural code, in a loop, to achieve what SQL can achieve declaratively. The effect of the _dynamic loop_ variant is better expressed thus: - -```plpgsql -create function s.f2(v_min in int) - returns table(val int) - set search_path = pg_catalog, pg_temp - security definer - language plpgsql -as $body$ -begin - return query execute format(' - select - case - when v < 85 then v + 3 - else v + 7 - end - from s.%I - where v > $1 - order by v', - 't') using v_min; -end; -$body$; -``` - -This: - -```plpgsql -select s.f2(v_min=>40); -``` - -produces the same result as does this: - -```plpgsql -select s.f(v_min=>40, mode=>'dynamic loop'); -``` -{{< /tip >}} \ No newline at end of file diff --git a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/_index.md b/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/_index.md deleted file mode 100644 index fb6bf0678819..000000000000 --- a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/_index.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: PL/pgSQL syntax and semantics [YSQL] -headerTitle: PL/pgSQL syntax and semantics -linkTitle: > - "language plpgsql" syntax and semantics -description: Describes the syntax and semantics of the PL/pgSQL language (a.k.a. language plpgsql). [YSQL]. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: plpgsql-syntax-and-semantics - parent: language-plpgsql-subprograms - weight: 20 -type: indexpage -showRightNav: true ---- - -PostgreSQL, and therefore YSQL, natively support both _language sql_ and _language plpgsql_ functions and procedures. But the implementation of a _do_ statement can only be _language plpgsql_. PL/pgSQL source text is governed by the [_plpgsql_block_stmt_ rule](#plpgsql-block-stmt). See these sections: - -- _[create function](../../../the-sql-language/statements/ddl_create_function/)_ -- _[create procedure](../../../the-sql-language/statements/ddl_create_procedure/)_ -- _[do](../../../the-sql-language/statements/cmd_do/)_ - -The syntax diagrams in these three sections show that the PL/pgSQL source text must be enquoted. Yugabyte recommends that, for consistency, you use dollar quoting around the source text and that you spell this as _$body$_. Notice that PL/pgSQL's dynamic SQL feature lets you write a user-defined procedure that will create a user-defined subprogram. If you take advantage of this, then you'll have to use different enquoting syntax around the source text of the to-be-created subprogram. - -This section, and its subsections, specify: - -- the grammar of the _plpgsql_block_stmt_ rule -- its decomposition down to terminal rules -- the associated semantics. - -{{%ebnf%}} - plpgsql_block_stmt, - plpgsql_declaration_section, - plpgsql_executable_section, - plpgsql_exception_section -{{%/ebnf%}} - -## The minimal PL/pgSQL source text - -The executable section can include a block statement—and this implies the possibility of an arbitrarily deep nesting. It's this that underpins this characterization of PL/pgSQL at the start of this overall section on [_language plpgsql_ subprograms](../../language-plpgsql-subprograms/): - -> PL/pgSQL is a conventional, block-structured, imperative programming language [whose] basic syntax conventions and repertoire of simple and compound statements seem to be inspired by Ada. - -The executable section is mandatory. This, therefore, is the minimal form of a PL/pgSQL source text: - -```output -$body$ -begin - -end; -$body$; -``` - -It's useful to know this because each of _create function_ and _create procedure_, when it completes without error, inevitably creates a subprogram upon which the _execute_ privilege has already been granted to _public_. See these tips in the sections that describe these two _create_ statements: - -- ['Create function' grants 'execute' to 'public'](../../../the-sql-language/statements/ddl_create_function/#create-function-grants-execute-to-public) - -- ['Create procedure' grants 'execute' to 'public'](../../../the-sql-language/statements/ddl_create_procedure/#create-procedure-grants-execute-to-public) - -Each tip recommends that you always revoke this privilege immediately after creating a subprogram. However, even this might expose a momentary security risk. Here is the watertight secure approach: - -```plpgsql -create schema s; - -create procedure s.p() - language plpgsql -as $body$ -begin - null; -- Implementation to follow. -end; -$body$; - -revoke execute on procedure s.p() from public; - --- "create or replace" leaves the extant privileges on "s.p" unchanged. -create or replace procedure s.p() - set search_path = pg_catalog, pg_temp - security definer - language plpgsql -as $body$ -declare - -- (Optionally) the intended declarations. - -- ... - -begin - -- The intended implementation. - -- ... -exception - -- (Optionally) the intended handlers. - -- ... -end; -$body$; -``` - -Notice that _null;_ is a legal PL/pgSQL executable statement. Of course, it does nothing at all. You might prefer to write _null;_ explicitly to emphasize your intention. Now you can grant _execute_ on _s.p_ to the role(s) that you intend. - -Each section is described in a dedicated subsection: - -- **[declaration section](./declaration-section)** - -- **[executable section](./executable-section)** - -- **[exception section](./exception-section)** diff --git a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/_index.md b/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/_index.md deleted file mode 100644 index af766dac33ab..000000000000 --- a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/_index.md +++ /dev/null @@ -1,160 +0,0 @@ ---- -title: PL/pgSQL executable section [YSQL] -headerTitle: The PL/pgSQL executable section -linkTitle: Executable section -description: Describes the syntax and semantics of the PL/pgSQL executable section. [YSQL]. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: executable-section - parent: plpgsql-syntax-and-semantics - weight: 20 -type: indexpage -showRightNav: true ---- - -## Syntax - -Here is the decomposition of the _plpgsql_stmt_ grammar rule, down to its terminal rules. - -{{%ebnf%}} - plpgsql_executable_stmt, - plpgsql_basic_stmt, - plpgsql_compound_stmt -{{%/ebnf%}} - -See the dedicated sections **[Basic PL/pgSQL executable statements](./basic-statements/)** and **[Compound PL/pgSQL executable statements](./compound-statements/)**. - -## Semantics - -You can use names, in the _executable section_, only if the name can be resolved. If you don't use a qualified identifier, then the name resolution is attempted within the names that the _declaration section_ of the most tightly enclosing block establishes—with the caveat that a matching name must be for an item of the kind that syntactic analysis of the to-be-resolved name has established. If name resolution fails in that most tightly enclosing scope, then it's re-attempted in the next most tightly enclosing scope—finishing (when the outermost block statement is the implementation of a subprogram and not that of a _do_ statement) with the subprogram's list of formal arguments. If a to-be-resolved name remains unresolved after failing in all these scopes, then resolution is attempted in schema scope. This account applies, too, for how the names of block statements are resolved—but, of course, these names must find resolution within the contained scopes of the outermost block statements (before, if resolution finds no match, then escaping to schema scope). - -Consider this contrived example. (It relies on the accounts of the [declaration section](../declaration-section) and the [exception section](../exception-section)). Here, no names escape to schema scope. - -```plpgsql -create function f(x in text) - returns table(z text) - set search_path = pg_catalog, pg_temp - security invoker - language plpgsql -as $body$ -<>declare - s constant text not null := rpad(' ', 10); - a constant text not null := 'b0.a'; -begin - z := ''; return next; - z := 'in <>'; return next; - z := s||'x: '||x; return next; - z := s||'a: '||a; return next; - - <>declare - x constant text not null := 'b1.x'; - begin - z := ''; return next; - z := 'in <>'; return next; - z := s||'x: '||x; return next; - z := s||'a: '||a; return next; - - <>declare - x constant text not null := 'b2.x'; - a constant text not null := 'b2.a'; - begin - z := ''; return next; - z := 'in <>'; return next; - z := s||'x: '||x; return next; - z := s||'f.x: '||f.x; return next; - end b2; - end b1; - - <>declare - a constant text not null := 'b3.a'; - begin - z := ''; return next; - z := 'in <>'; return next; - z := s||'x: '||x; return next; - if length(x) > 3 then - raise plpgsql_error using message := 'bad "x" in block <>'; - end if; - exception - when plpgsql_error then - <>declare - msg text not null := ''; - begin - get stacked diagnostics msg := message_text; - - z := '-----'; return next; - z := '"plpgsql_error" handled for x = '||x; return next; - z := 'Message: '||msg; return next; - end except; - end b3; -end b0; -$body$; -``` - -The only goal of this code is pedagogy. Notice these semantic features: - -- Blocks are nested. For example, _b2_ is declared within the executable section of _b1_; and _b1_ is declared within the executable section of _b0_; - -- All the local variables are decorated with the keywords _not null_ and _constant_ and are given initial values, as they must be, as part of the declaration. - -- The names of the local variables, _a_, and _x_ collide with names that are defined in outer scopes. - -- The block statements are all labeled to allow the use of block-qualified identifiers. - -- An unqualified reference to an item (formal argument or local variable) resolves to whatever the name means in the most-tightly-enclosing scope. An item in an inner scope can therefore hide an item with the same name in an outer scope. - -- You can safely write _raise plpgsql_error_, and then write an exception section to handle it, knowing that only your code might raise this. - - -Test it first like this: - -```plpgsql -select f('f.x'); -``` - -This is the output: - -```output - in <> - x: f.x - a: b0.a - - in <> - x: b1.x - a: b0.a - - in <> - x: b2.x - f.x: f.x - - in <> - x: f.x -``` - -Now test it like this: - -```plpgsql -select f('f.bad'); -``` - -This is the output: - -```output - in <> - x: f.bad - a: b0.a - - in <> - x: b1.x - a: b0.a - - in <> - x: b2.x - f.x: f.bad - - in <> - x: f.bad - ----- - "plpgsql_error" handled for x = f.bad - Message: bad "x" in block <> -``` diff --git a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/_index.md b/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/_index.md deleted file mode 100644 index 4e4778dbe0c8..000000000000 --- a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/basic-statements/_index.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: Basic PL/pgSQL executable statements [YSQL] -headerTitle: Basic PL/pgSQL executable statements -linkTitle: Basic statements -description: Describes the syntax and semantics of the basic PL/pgSQL executable statements. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: basic-statements - parent: executable-section - weight: 10 -type: indexpage -showRightNav: true ---- - -The following table lists all of the [basic PL/pgSQL executable statements](../../../../../syntax_resources/grammar_diagrams/#plpgsql-basic-stmt). -- The _Statement Name_ column links to the page where the semantics are described. -- The _Syntax rule name_ column links to the definition on the omnibus [Grammar Diagrams](../../../../../syntax_resources/grammar_diagrams/) reference page. - -| STATEMENT NAME | SYNTAX RULE NAME | COMMENT | -| -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- | -| ["assert" statement](./assert/) | [plpgsql_assert_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-assert-stmt) | abort the current server call if the assertion tests "false" | -| "assign" statement | [plpgsql_assignment_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-assignment-stmt) | e.g. "a := b + c;" and "v := (select count(*) from s.t)". No further explanation is needed. | -| [Bare SQL statement](./doing-sql-from-plpgsql/#the-bare-sql-statement) | [plpgsql_static_bare_sql_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-static-bare-sql-stmt) | ANY embedded SQL statement (including DDL, etc) that doesn't return values | -| ["close" statement](./cursor-manipulation/#plpgsql-close-cursor-stmt) | [plpgsql_close_cursor_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-close-cursor-stmt) | close a cursor (using "refcursor" variable) | -| ["continue" statement](../compound-statements/loop-exit-continue/#continue-statement) | [plpgsql_continue_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-continue-stmt) | start the next iteration of a loop (or enclosing loop) | -| ["execute" statement](./doing-sql-from-plpgsql/#the-execute-statement) | [plpgsql_dynamic_sql_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-dynamic-sql-stmt) | execute a dynamic SQL statement, optionally with "into" clause for returned values | -| ["exit" statement](../compound-statements/loop-exit-continue/#exit-statement) | [plpgsql_exit_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-exit-stmt) | exit from a loop (or enclosing loop) | -| ["fetch" statement](../compound-statements/loop-exit-continue/infinite-and-while-loops/#infinite-loop-over-cursor-results) | [plpgsql_fetch_from_cursor_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-fetch-from-cursor-stmt) | fetch from a cursor (using "refcursor" variable) | -| ["get diagnostics" statement](./get-diagnostics/) | [plpgsql_get_diagnostics_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-get-diagnostics-stmt) | get diagnostic info about the most-recently-executed SQL statement | -| ["get stacked diagnostics" statement](../../exception-section/#how-to-get-information-about-the-error) | [plpgsql_get_stacked_diagnostics_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-get-stacked-diagnostics-stmt) | get diagnostic info about the exception that brought the point of execution to the present handler | -| ["insert, update, delete into" statement](./doing-sql-from-plpgsql/#the-insert-update-delete-into-statement) | [plpgsql_static_dml_returning_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-static-dml-returning-stmt) | embedded "insert", "update", or "delete" statement with "into" clause for returned values | -| "move" statement | [plpgsql_move_in_cursor_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-move-in-cursor-stmt) | move in cursor — not yet supported, see [Beware Issue #6514](../../../../../cursors/#beware-issue-6514) | -| ["open" statement](./cursor-manipulation/#plpgsql-open-cursor-stmt) | [plpgsql_open_cursor_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-open-cursor-stmt) | open a cursor (using "refcursor" variable) | -| ["perform" statement](./doing-sql-from-plpgsql/#the-perform-statement) | [plpgsql_perform_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-perform-stmt) | execute a "select" statement without returning rows | -| ["raise" statement](./raise/) | [plpgsql_raise_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-raise-stmt) | "raise info" or... "warning" or... "exception" | -| ["return" statement](./return-statement/#semantics) | [plpgsql_return_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-return-stmt) | exit from subprogram to caller, optionally returning value(s) | -| ["select into" statement](./doing-sql-from-plpgsql/#the-select-into-statement) | [plpgsql_static_select_into_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-static-select-into-stmt) | embedded single-row "select" with "into" clause for returned values | diff --git a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/_index.md b/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/_index.md deleted file mode 100644 index 10bd8068e80a..000000000000 --- a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/_index.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: Compound PL/pgSQL executable statements [YSQL] -headerTitle: Compound PL/pgSQL executable statements -linkTitle: Compound statements -description: Describes the syntax and semantics of the compound PL/pgSQL executable statements. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: compound-statements - parent: executable-section - weight: 20 -type: indexpage -showRightNav: true ---- - -The following table lists all of the [compound PL/pgSQL executable statements](../../../../../syntax_resources/grammar_diagrams/#plpgsql-compound-stmt). -- The _Statement Name_ column links to the page where the semantics are described. -- The _Syntax rule name_ column links to the definition on the omnibus [Grammar Diagrams](../../../../../syntax_resources/grammar_diagrams/) reference page. - -| STATEMENT NAME | SYNTAX RULE NAME | COMMENT | -| --------------------------------------------- | ------------------------------------------------------------------------------------------ | ------- | -| [block statement](../../#plpgsql-block-stmt) | [plpgsql_block_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-block-stmt) | **declare** _\_
**begin** _\_
**exception** _\_
**end;** | -| [case statement](./case-statement/) | [plpgsql_case_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-case-stmt) | _Simple form:_
**case** _\_
**when** _\_ **then** _\_ ...
**else** _\_
**end case;**

_Searched form:_
**case**
**when** _\_ **then** _\_ ...
**else** _\_
**end case;** | -| [if statement](./if-statement/) | [plpgsql_if_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-if-stmt) | **if** _\_ **then** _\_
**elsif** _\_ **then** _\_ ...
**else** _\_
**end if;** | -| [loop statement](./loop-exit-continue/) | [plpgsql_loop_stmt](../../../../../syntax_resources/grammar_diagrams/#plpgsql-loop-stmt) | [unbounded loop](./loop-exit-continue/infinite-and-while-loops/) _or_ [bounded loop](./loop-exit-continue/#bounded-loop) | diff --git a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/_index.md b/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/_index.md deleted file mode 100644 index 261bd0be2a7b..000000000000 --- a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/language-plpgsql-subprograms/plpgsql-syntax-and-semantics/executable-section/compound-statements/loop-exit-continue/_index.md +++ /dev/null @@ -1,86 +0,0 @@ ---- -title: Loop, exit, and continue statements [YSQL] -headerTitle: Loop, exit, and continue statements -linkTitle: - The "loop", "exit", and "continue" statements -description: Describes the syntax and semantics of Loop, exit, and continue statements. [YSQL] -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: loop-exit-continue - parent: compound-statements - weight: 40 -type: indexpage -showRightNav: true ---- - -## Syntax - -### The "loop" statement - -{{%ebnf%}} - plpgsql_loop_stmt, - plpgsql_unbounded_loop_defn, - plpgsql_bounded_loop_defn, - plpgsql_integer_for_loop_defn, - plpgsql_array_foreach_loop_defn, - plpgsql_query_for_loop_defn, - plpgsql_dynamic_subquery -{{%/ebnf%}} - -### The "exit" and "continue" statements - -{{%ebnf%}} - plpgsql_exit_stmt, - plpgsql_continue_stmt -{{%/ebnf%}} - -## Semantics - -There are two kinds of PL/pgSQL loop: the _unbounded loop_; and the (bounded) _for loop_. - -- The number of iterations that an _unbounded loop_ performs isn't given by a simple recipe. Rather, iteration continues until it is interrupted, at any statement within its statement list, by invoking _exit_. -- In contrast, the (maximum) number of iterations that a _for loop_ performs is determined, before the first iteration starts. The recipe might, for example, be just the consecutive integers between a lower bound and an upper bound. Or it might be "for every element in the specified array", or "for every row in the result set of a specified query. - -The functionality of all kinds of loops is complemented by the _exit_ statement and the _continue_ statement. The _exit_ statement aborts the iteration altogether. And the _continue_ statement aborts just the current iteration and then starts the next one. - -See the section [Two case studies: Using various kinds of "loop" statement, the "exit" statement, and the "continue" statement](../loop-exit-continue/two-case-studies/) for realistic uses of all of the statements (except for the _while loop_) that this page describes. - -### Unbounded loop - -The name _unbounded_ denotes the fact that the number of iterations that the loop will complete is not announced at the start. Rather, iteration ends when facts that emerge while it executes determine that it's time to stop iterating. There are two kinds of _unbounded loop_: the _infinite loop_; and the _while loop_. See the dedicated page [The "infinite loop" and the "while loop"](./infinite-and-while-loops/). - -### "Exit" statement - -Most usually, the _exit_ statement is written within a _loop_ statement—but see the note immediately below. - -The _exit_ statement aborts the execution of the loop. Notice the optional _label_. It must match an _end loop_ statement (or the _end_ statement of a block statement) with the same _label_ within the current top-level block statement. - -- When the _label_ is omitted, the point of execution moves to the statement that immediately follows the present loop's _end loop_ statement. -- When _exit some_label_ is used, the point of execution moves to the statement that immediately follows the _end loop_ statement (or the bare _end_ statement of the block statement) that has the same label. - -{{< note title="An 'exit' statement's 'label' must match that of an 'end loop' statement or that of the 'end' statement of a block statement." >}} -See the dedicated section [Using the "exit" statement to jump out of a block statement](./exit-from-block-statememt/). -{{< /note >}} - -### "Continue" statement - -The _continue_ statement is legal only within a _loop_ statement. - -- When _label_ is omitted, the _continue_ statement causes the current iteration to be abandoned and the next one to start immediately. - -- When _label_ is specified, it causes the current iteration of the most tightly enclosing loop, and any loops in which it is nested through the one whose _end loop_ matches the _label_, to be abandoned. Then the next iteration of the loop whose _end loop_ matches the _label_ starts immediately. - -- If used, the _label_ must match that of an _end loop_ statement. - -It's possible to write the _exit_ or the _continue_ statement in one of the legs of an _if_ statement or a _case_ statement in the executable section, or even in the exception section, of a block statement at any nesting depth. Probably, in such a context, you'd omit the optional _when_ clause. - -### Bounded loop - -The name _bounded_ denotes the fact that the (maximum) number of iterations that the loop will complete is computed, just before the first iteration, on entry into the loop. The qualifier "maximum" is used because the _exit_ statement can be used to cause premature exit. - -There are three kinds of _bounded loop_ loop: - -- the _[integer for loop](./integer-for-loop/)_ — defined by the _plpgsql_integer_for_loop_defn_ syntax rule -- the _[array foreach loop](./array-foreach-loop/)_ — defined by the _plpgsql_array_foreach_loop_defn_ syntax rule -- The _[query for loop](./query-for-loop/)_ — defined by the _plpgsql_query_for_loop_defn_ syntax rule. diff --git a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/_index.md b/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/_index.md deleted file mode 100644 index b639c1942867..000000000000 --- a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/_index.md +++ /dev/null @@ -1,86 +0,0 @@ ---- -title: Subprogram attributes [YSQL] -headerTitle: Subprogram attributes -linkTitle: Subprogram attributes -description: Describes and categorizes the various attributes that characterize user-defined functions and procedures [YSQL]. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: subprogram-attributes - parent: user-defined-subprograms-and-anon-blocks - weight: 10 -aliases: -type: indexpage -showRightNav: true ---- - -The overall behavior of a user-defined function or procedure is determined by a set of characteristics that are defined with the _create [or replace]_ and _alter_ statements for each subprogram kind. - -- The following characteristics are singletons in the categorization scheme in that they may be set _only_ with _create [or replace]_ and the rules that specify how they are set are not spelled with "attribute": - - - The argument list (i.e. the name, the mode, the data type, and optionally a default expression for each argument). See the [arg_decl_with_dflt](../../../ysql/syntax_resources/grammar_diagrams/#arg-decl-with-dflt) rule. - - - And, just for a function, the return data type. - -- All of the other determining characteristics are known by the term of art _attribute_. This term is used in the names of rules in the [YSQL Grammar](../../syntax_resources/grammar_diagrams/) for distinct subcategories of attribute, grouped according to _when_ they may be set (only with _create [or replace]_, only with _alter_, or with both) and to _which kind_ of subprogram they apply (only to functions, or to both functions and procedures). - -You can see the names of all of these rules in the grammars for _create [or replace] function_, _create [or replace] procedure_, _alter function_, and _alter procedure_, below: - -{{%ebnf localrefs="alterable_fn_only_attribute,alterable_fn_and_proc_attribute,special_fn_and_proc_attribute,unalterable_fn_attribute,unalterable_proc_attribute" %}} - create_function, - create_procedure, - alter_function, - alter_procedure -{{%/ebnf%}} - -Here are the different attribute rules. - -## Unalterable subprogram attributes - -The _unalterable subprogram attributes_ can be set _only_ with the _create [or replace]_ statement. Each of _function_ and _procedure_ has its own _unalterable attributes_ rule. They share _language_ and _subprogram_implementation_. But the status _regular function_ or _window function_ is meaningless for a procedure. - -{{%ebnf%}} - unalterable_fn_attribute, - unalterable_proc_attribute -{{%/ebnf%}} - -{{< note title="This major section, so far, describes only user-defined subprograms and anonymous blocks that are implemented in SQL or PL/pgSQL." >}} -Further, it does not yet describe how to create user-defined window functions. -{{< /note >}} - - -## Special subprogram attributes - -The special subprogram attributes are set using a general syntax style with the _alter_ statements. - -{{%ebnf%}} - special_fn_and_proc_attribute -{{%/ebnf%}} - -The syntax diagram shows that if you want to change any of these attributes, then you must change them one at a time by issuing _alter_ repeatedly. - -The _schema_ and the _name_ of a subprogram are set using dedicated explicit syntax with _create [or replace]_. But the _owner_ cannot be explicitly set with _create [or replace]_; rather, a new subprogram's _owner_ is implicitly set to what the _[current_role](https://www.postgresql.org/docs/15/functions-info.html#FUNCTIONS-INFO-SESSION-TABLE)_ built-in function returns. (This will be what the _[session_user](https://www.postgresql.org/docs/15/functions-info.html#FUNCTIONS-INFO-SESSION-TABLE)_ built-in function returns if _create [or replace]_ is issued as a top-level SQL statement; and it will be the _owner_ of a _security definer_ subprogram that issues the SQL statement.) - -As it happens, and just for PostgreSQL-historical reasons, if you want to specify the _extension_ on which a new subprogram depends you can do this only by _first_ creating the subprogram and _then_ specifying the name of the _extension_ using the subprogram-specific _alter_ statement. - -See the section [The semantics of the "depends on extension" subprogram attribute](depends-on-extension-semantics/) for more information about this attribute. - -## Alterable subprogram attributes - -These attributes are common for both functions and procedures: - -{{%ebnf%}} - alterable_fn_and_proc_attribute -{{%/ebnf%}} - -See the subsection [Alterable subprogram attributes](./alterable-subprogram-attributes/) for the explanations of the _configuration parameter_ and _security_ attributes. - -## Alterable function-only attributes - -Notice that there are no procedure-specific alterable attributes. These attributes are specific to just functions: - -{{%ebnf%}} - alterable_fn_only_attribute -{{%/ebnf%}} - -See the subsection [Alterable function-only attributes](./alterable-function-only-attributes/) for the explanations of the _volatility_, _On NULL input_, _parallel_, _leakproof_, _cost_ and _rows_ attributes. diff --git a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/alterable-function-only-attributes/_index.md b/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/alterable-function-only-attributes/_index.md deleted file mode 100644 index 37e22f2c075e..000000000000 --- a/docs/content/preview/api/ysql/user-defined-subprograms-and-anon-blocks/subprogram-attributes/alterable-function-only-attributes/_index.md +++ /dev/null @@ -1,384 +0,0 @@ ---- -title: Alterable function-only attributes [YSQL] -headerTitle: Alterable function-only attributes -linkTitle: Alterable function-only attributes -description: Describes and categorizes the various attributes that characterize just user-defined functions [YSQL]. -image: /images/section_icons/api/subsection.png -menu: - preview_api: - identifier: alterable-function-only-attributes - parent: subprogram-attributes - weight: 30 -aliases: -type: indexpage -showRightNav: true ---- - -## Volatility - -The _[volatility](../../../syntax_resources/grammar_diagrams/#volatility)_ attribute has these allowed values: - -- _volatile_ -- _stable_ -- _immutable_ - -The default is _volatile_. - -This attribute allows the function's author to state a promise about the timespan over which a given (set of) actual arguments uniquely determines the function's return value. According to what is promised, PostgreSQL (and therefore YSQL) is allowed to cache some number of _"return-value-for-actual-arguments"_ pairs—in pursuit of improving performance. If caching is done, the scope is a single session and the duration is limited to the session's lifetime. - -Notice that _"is allowed to cache"_ does not mean _"will cache"_. The mere possibility is critical to the definition of the semantics of volatility. The conditions that make caching more, or less, likely are a separable concern. - -Tautologically, PostgreSQL (and therefore YSQL) is unable to detect if the promise is good—so an author who gives a false promise is asking for wrong results. - - -{{< tip title="See the PostgreSQL documentation for more detail." >}} -The section [38.7. Function Volatility Categories](https://www.postgresql.org/docs/15/xfunc-volatility.html) explains some of the more subtle aspects of function _volatility_. In particular, it makes this recommendation: - -> For best optimization results, you should label your functions with the strictest volatility category that is valid for them. - -The section [43.11.2. Plan Caching](https://www.postgresql.org/docs/15/plpgsql-implementation.html#PLPGSQL-PLAN-CACHING) explains the caching mechanism and how it is tied to the notion of _prepare_ and the possibility that a prepared statement might (or might not) cache its execution plan. -{{< /tip >}} - -### volatile - -This denotes no promise at all. Here's a compelling demonstration of a function where _volatile_ is the only possible honest choice: - -```plpgsql -drop function if exists volatile_result() cascade; - -create function volatile_result() - returns text - volatile - language sql -as $body$ - select gen_random_uuid()::text; -$body$; - -select volatile_result() as v1, volatile_result() as v2; -``` - -This is a typical result: - -```output - v1 | v2 ---------------------------------------+-------------------------------------- - 0869edfa-af63-4308-8b80-d9d5a58491b8 | 55589cf4-5441-4582-9cf1-688481b37f55 -``` - -The function _volatile_result()_ returns different results upon successive evaluations even during the execution of a single SQL statement. - -### stable - -This denotes a promise that holds good just for the duration of a SQL statement's execution. The human analyst can readily see that this following function can honestly be marked as _stable_: - -```plpgsql -drop function if exists stable_result(text) cascade; - -create function stable_result(which in text) - returns text - stable - language sql -as $body$ - select - case - when which = 'a' then current_setting('x.a') - when which = 'b' then current_setting('x.b') - end; -$body$; -``` - -The scope of a user-defined session parameter like _"x.a"_ is just the single session that sets it. And, in the example, the human can readily see that the code of _stable_result()_ doesn't set _"a.x"_ or _"x.b"_ — and that no other route exists to setting it from elsewhere while _stable_result()_ is executing. Test it like this: - -```plpgsql -set x.a = 'dog'; -set x.b = 'cat'; - -select stable_result('a'), stable_result('b'); -``` - -Clearly, the return values from _stable_result()_ can be different when it's invoked in a new SQL statement thus: - -```plpgsql -set x.a = 'frog'; -set x.b = 'bird'; - -select stable_result('a'), stable_result('b'); -``` - -### immutable - -When you mark a function as _immutable_, you give permission for PostgreSQL (and therefore YSQL) to build a session-duration cache where the key to a cache-entry is the vector of actual arguments with which the function is invoked and the key's payload is the function's return value. - -(The caching mechanism is the prepared statement and the possibility to cache an execution plan with it. But you needn't understand the mechanism in order to understand the semantic proposition.) - -Further, if you want to create an expression-based index that references a user-defined function, then it _must_ be marked _immutable_. Without this volatility setting, you get this error: - -```output -42P17: functions in index expression must be marked IMMUTABLE -``` - -Marking a function as _immutable_ expresses a promise that must hold good for the lifetime of the function's existence (in other words, from the moment it's created to the moment that it's dropped) thus: - -- The function has no side effects. -- The function is mathematically deterministic—that is, the vector of actual arguments uniquely determines the function's return value. - -Nothing prevents you from lying. But doing so will, sooner or later, bring wrong results. - -See the section [Immutable function examples](immutable-function-examples/). - -## On_null_input - -The _[on_null_input](../../../syntax_resources/grammar_diagrams/#on-null-input)_ attribute has these allowed values: - -- _called on null input_ -- _strict_ -- _returns null on null input_ - -The default is _called on null input_. Notice that _strict_ is simply a synonym for _returns null on null input_. - -### called on null input - -This allows the function to be executed just as its source code specifies when at least one of its actual arguments is _null_. Function authors must then take the responsibility for handling the case that any actual is _null_ appropriately. - -### strict - -This instructs YSQL simply to skip executing the function's source code when at least one of its actual arguments is _null_ and simply to return _null_ immediately. - -Try this: - -```plpgsql -\pset null '' -deallocate all; -drop function if exists f(text, int, boolean) cascade; - -create function f(t in text, i in int, b in boolean) - returns text - called on null input - language plpgsql -as $body$ -declare - status text not null := '???'; -begin - if t is null then - status := 'Bad: t is null'; - - elsif i is null then - status := 'Bad: i is null'; - - elsif b is null then - status := 'Bad: b is null'; - - else - status := 'OK.'; - end if; - - return status; -end; -$body$; - -prepare q as -select - (select f('dog', 42, true)) as test_0, - (select f(null, 42, true)) as test_1, - (select f('dog', null, true)) as test_2, - (select f('dog', 42, null)) as test_3; - -execute q; -``` - -This is the result: - -```output - test_0 | test_1 | test_2 | test_3 ---------+----------------+----------------+---------------- - OK. | Bad: t is null | Bad: i is null | Bad: b is null -``` - -Now try this: - -```plpgsql -alter function f(text, int, boolean) strict; -execute q; -``` - -This is the new result: - -```output - test_0 | test_1 | test_2 | test_3 ---------+--------+--------+-------- - OK. | | | -``` - -{{< tip title="Always explain your reasoning carefully in the design documentation when you decide to mark a function as 'strict'." >}} -It's quite hard to imagine a plausible use case where you want silently to bypass a function's execution—especially given that a function is not supposed to have side effects. - -Yugabyte recommends that when you come across such a use case and decide to mark a function as _strict_, you explain your reasoning very carefully in the design documentation. -{{< /tip >}} - -## Parallel_mode - -The _[parallel_mode](../../../syntax_resources/grammar_diagrams/#parallel-mode)_ attribute has these allowed values: - -- _unsafe_ -- _restricted_ -- _safe_ - -The default is _unsafe_. You risk wrong results in a parallel query: - -- If you mark a function as parallel _safe_ when it should be marked _restricted_ or _unsafe_,. -- If you mark a function as parallel _restricted_ when it should be marked _unsafe_. - -In this way, the _parallel_ mode attribute is like the _volatility_ and _leakproof_ attributes. You must make the marking honestly; and YSQL will not detect if you lie. - -#### unsafe - -This tells YSQL that the function can't be executed in parallel mode. The presence of such a function in a SQL statement therefore forces a serial execution plan. - -You must mark a function as parallel unsafe: - -- If it modifies any database state. -- If it makes any changes to the transaction such as using sub-transactions. -- If it accesses sequences or attempts to make persistent changes to settings. - -Notice that, because a function ought not to have side effects, you should consider using a procedure instead. If it needs to return a value, then use an _inout_ argument. - -#### restricted - -You must mark a function as parallel restricted: - -- if it accesses temporary tables. -- If it accesses client connection state (\(for examples by using the _current_value()_ built-in function). -- If it accesses a cursor or any miscellaneous backend-local state which the system cannot be synchronized in parallel mode. - -For example, the _setseed()_ built-in function sets the seed for subsequent invocations of the _random()_ built-in function; but _setseed()_ cannot be executed other than by the parallelization group leader because a change made by another process would not be reflected in the leader. - -#### safe - -This tells YSQL that the function is safe to run in parallel mode without restriction. - -## Leakproof - -The default for this attribute is _not leakproof_. Only a _superuser_ may mark a function as _leakproof_. - -Functions and operators marked as _leakproof_ are assumed to be trustworthy, and may be executed before conditions from security policies and security barrier views. This is a component of the [Rules and Privileges](https://www.postgresql.org/docs/15/rules-privileges.html) functionality. See the account of _[create view](https://www.postgresql.org/docs/15/sql-createview.html)_ in the PostgreSQL documentation for the syntax for the _security_barrier_ attribute. - -The _leakproof_ attribute indicates whether or not the function has any side effects. A function is considered to be _leakproof_ only if: - -- It makes no changes to the state of the database. -- It doesn't change the value of a session parameter. -- It reveals no information about its arguments other than by its return value. - -For example, a function is _not leakproof_ if: - -- It might raise an error for any particular value for at least one of the actual arguments with which it is invoked. - -- It might report the value of least one of the actual arguments with which it is invoked in an error message. - -Just as is the case with the _volatility_ attribute, the decision to mark a function as _leakproof_ or _not leakproof_ requires, and depends entirely upon, human judgment. YSQL cannot police the programmer's honesty. - -The following demonstration assumes that you can connect to some database as a regular role and as a _superuser_. This code uses the database _demo_ and connects as the regular role _u1_ and the _superuser_ role _postgres_. Change the names to suit your environment. - -```plpgsql -\c demo u1 - -drop schema if exists s1 cascade; -create schema s1; - -create function s1.f(i in int) - returns int - language plpgsql - not leakproof -as $body$ -begin - return i*2; -end; -$body$; - -create view s1.f_leakproof_status(leakproof) as -select - proleakproof::text -from pg_proc -where - pronamespace::regnamespace::text = 's1' and - proname::text = 'f' and - prokind = 'f'; - -select leakproof from s1.f_leakproof_status; -``` - -This is the result: - -```output - leakproof ------------ - false -``` - -Now create a _security invoker_ procedure to mark _s1.f(int)_ as _leakproof_ and execute it: - -```plpgsql -create procedure s1.mark_f_leakproof(result in out text) - security invoker - language plpgsql -as $body$ -begin - alter function s1.f(int) leakproof; - result := '"s1.f(int)" is now marked as leakproof'; -exception when insufficient_privilege then - result := 'Only superuser can define a leakproof function.'; -end; -$body$; - -call s1.mark_f_leakproof(''); -``` - -This is the result, as expected: - -```output - result -------------------------------------------------- - Only superuser can define a leakproof function. -``` - -Notice how procedure _s1.mark_f_leakproof(text)_ is designed: - -- It is set to be _security invoker_ so that it will act with the privileges of the invoking role—in this demonstration either the regular role _u1_ or the _superuser_ role _postgres_. This means that its power depends upon knowing the password for the _postgres_ role. -- It is created as a procedure, and not as a function, even though it needs to return a success message, because procedures _do_ something—but functions simply name a computed value that will be used in an expression and ought not to have (regular) side effects. - -Now connect as _postgres_ and execute _mark_f_leakproof()_ again: - -```plpgsql -\c demo postgres -call s1.mark_f_leakproof(''); -``` - -This is the new result, again as expected: - -```output - result ----------------------------------------- - "s1.f(int)" is now marked as leakproof -``` - -Re-connect as _u1_ and check the _leakproof_ status: - -```plpgsql -\c demo u1 -select leakproof from s1.f_leakproof_status; -``` - -This is the new result: - -```output - leakproof ------------ - true -``` - -## Cost and rows - -Each of these attributes takes a positive integer argument. They provide information for the planner to use. - -- The _cost_ attribute provides an estimate for execution cost for the function, in units of _cpu_operator_cost_. If the function returns a set, this is the cost per returned row. If the _cost_ is not specified, then _1 unit_ is assumed for C-language and internal functions, and _100 units_ is assumed for functions in all other languages. Larger values cause the planner to try to avoid evaluating the function more often than necessary. (This suggests that the function should be marked with _stable_ or _immutable_ volatility.) - -- The _rows_ attribute provides an estimate of the number of rows that the planner should expect the function to return. This is allowed only when the function is declared to return a set. The default assumption is _1000 rows_. diff --git a/docs/content/preview/architecture/_index.md b/docs/content/preview/architecture/_index.md deleted file mode 100644 index 7f0296a81686..000000000000 --- a/docs/content/preview/architecture/_index.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -title: Architecture -headerTitle: Architecture -linkTitle: Architecture -description: Learn about the YugabyteDB architecture, including query, transactions, sharding, replication, and storage layers. -headcontent: Internals of query, transactions, sharding, replication, and storage layers -aliases: - - /architecture/layerered-architecture/ -menu: - preview: - identifier: architecture - parent: reference - weight: 1050 -type: indexpage ---- - -YugabyteDB is a distributed database that seamlessly combines the principles of distributed systems, where multiple machines collaborate, with the familiar concepts of traditional databases, where data is organized in tables with standard interfaces for reading and writing data. - -Unlike traditional centralized databases, YugabyteDB is designed to manage and process data across multiple nodes or servers, ensuring resiliency, consistency, high availability, scalability, fault tolerance, and other [design goals](design-goals/). - -{{}} -Check out YugabyteDB [key concepts](./key-concepts) for your quick reference. -{{}} - -## Layered architecture - -In general, operations in YugabyteDB are split logically into 2 layers, the query layer and the storage layer. The query layer is responsible for handling user requests and sending the requests to the right data. The storage layer is responsible for optimally storing the data on disk and managing replication and consistency. - -![YugabyteDB Layered Architecture](/images/architecture/layered-architecture.png) - -## Query layer - -For operating (CRUD) on the data that is split and stored across multiple machines, YugabyteDB provides two APIs, YSQL and YCQL. The query layer takes the user query submitted via the API and sends or fetches data to and from the right set of tablets. - -{{}} -To understand how the query layer is designed, see [Query layer](query-layer/). -{{}} - -## Storage layer - -The tablet data is optimally stored and managed by DocDB, a document store that has been built on top of RocksDB for higher performance and persistence. - -{{}} -To understand how data storage works in YugabyteDB, see [DocDB](docdb/). -{{}} - -## Sharding - -YugabyteDB splits table data into smaller pieces called tablets so that the data can be stored in parts across multiple machines. The mapping of a row to a tablet is deterministic and this process is known as sharding. - -{{}} -To learn more about the various sharding schemes, see [Sharding](docdb-sharding/). -{{}} - -## Replication - -Tablets are replicated for resiliency, high availability, and fault tolerance. Each tablet has a leader that is responsible for consistent reads and writes to the data of the tablet and a few followers. The replication is done using the Raft protocol to ensure consistency of data across the leader and followers. - -{{}} -To understand how replication works, see [Replication](docdb-replication/). -{{}} - -## Transactions - -Transactions are a set of operations (CRUD) that are executed atomically with the option to roll back all actions if any operation fails. - -{{}} -To understand how transactions work in YugabyteDB, see [Transactions](transactions/). -{{}} - -## Master server - -The master service acts a catalog manager and cluster orchestrator, and manages many background tasks. - -{{}} -For more details, see [YB-Master](./yb-master). -{{}} - -## TServer - -YugabyteDB splits table data into tablets. These tablets are maintained and managed on each node by the TServer. - -{{}} -For more details, see [YB-TServer](./yb-tserver). -{{}} diff --git a/docs/content/preview/architecture/docdb-replication/_index.md b/docs/content/preview/architecture/docdb-replication/_index.md deleted file mode 100644 index 1fa2e34d446d..000000000000 --- a/docs/content/preview/architecture/docdb-replication/_index.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: DocDB replication layer -headerTitle: DocDB replication layer -linkTitle: Replication -description: Learn how synchronous and asynchronous replication work in DocDB, including advanced features like xCluster replication and read replicas. -headcontent: Learn how synchronous and asynchronous replication work in DocDB. -menu: - preview: - identifier: architecture-docdb-replication - parent: architecture - weight: 800 -type: indexpage ---- - -Table data is split into [tablets](../key-concepts#tablet) and managed by [DocDB](../docdb). By default, each tablet is synchronously replicated as per the [replication factor](../key-concepts#replication-factor-rf), using the [Raft](./raft) algorithm across various nodes to ensure data consistency, fault tolerance, and high availability. The replication layer is a critical component that determines how data is replicated, synchronized, and made consistent across the distributed system. In this section, you can explore the key concepts and techniques used in the replication layer of YugabyteDB. - -## Raft consensus protocol - -At the heart of YugabyteDB is Raft consensus protocol that ensures the replicated data remains consistent across all the nodes. Raft is designed to be a more understandable alternative to the complex Paxos protocol. It works by electing a leader node that is responsible for managing the replicated log and coordinating the other nodes. - -{{}} -To understand the different concepts in the consensus protocol, see [Raft](./raft). -{{}} - -## Synchronous Replication - -YugabyteDB ensures that all writes are replicated to a majority of the nodes before the write is considered complete and acknowledged to the client. This provides the highest level of data consistency, as the data is guaranteed to be durable and available on multiple nodes. YugabyteDB's synchronous replication architecture is inspired by [Google Spanner](https://research.google.com/archive/spanner-osdi2012.pdf). - -{{}} -To understand how replication works, see [Synchronous replication](./replication). -{{}} - -## xCluster - -Asynchronous replication, on the other hand, does not wait for writes to be replicated to all the nodes before acknowledging the client. Instead, writes are acknowledged immediately, and the replication process happens in the background. Asynchronous replication provides lower latency for write operations, as the client does not have to wait for the replication to complete. However, it comes with the trade-off of potentially lower consistency across universes, as there may be a delay before the replicas are fully synchronized. - -In YugabyteDB, you can use xCluster to set up asynchronous replication between 2 different distant [universes](../key-concepts#universe) either in a unidirectional or bi-directional manner. - -{{}} -To understand how asynchronous replication between 2 universes works, see [xCluster](./async-replication). -{{}} - -## Read replica - -Read replicas are effectively in-universe asynchronous replicas. It is a optional cluster that you can add on to an existing cluster which can help you improve read latency for users located far away from your [primary cluster](../key-concepts#primary-cluster). - -{{}} -To understand how read replicas work, see [Read replicas](./read-replicas). -{{}} - -## Change Data Capture (CDC) - -CDC is a technique used to track and replicate changes to the data. CDC systems monitor the database's transaction log and capture any changes that occur. These changes are then propagated to external systems or replicas using connectors. - -CDC is particularly beneficial in scenarios where real-time data synchronization is required, such as data warehousing, stream processing, and event-driven architectures. It allows the replicated data to be kept in sync without the need for full table replication, which can be more efficient and scalable. - -{{}} -To understand how CDC works, see [CDC](./change-data-capture). -{{}} diff --git a/docs/content/preview/architecture/docdb-replication/change-data-capture.md b/docs/content/preview/architecture/docdb-replication/change-data-capture.md deleted file mode 100644 index ad10144db44b..000000000000 --- a/docs/content/preview/architecture/docdb-replication/change-data-capture.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: Architecture for CDC using gRPC protocol -headerTitle: CDC using gRPC protocol -linkTitle: CDC using gRPC protocol -description: Learn how YugabyteDB supports asynchronous replication of data changes (inserts, updates, and deletes) to external databases or applications. -headContent: Asynchronous replication of data changes (inserts, updates, and deletes) to external databases or applications -tags: - feature: early-access -aliases: - - /preview/architecture/change-data-capture/ -menu: - preview: - parent: architecture-docdb-replication - identifier: architecture-docdb-replication-cdc - weight: 600 -type: docs ---- - -Change data capture (CDC) in YugabyteDB provides technology to ensure that any changes in data due to operations such as inserts, updates, and deletions are identified, captured, and made available for consumption by applications and other tools. - -## Architecture - -Every YB-TServer has a `CDC service` that is stateless. The main APIs provided by the CDC service are the following: - -- `createCDCSDKStream` API for creating the stream on the database. -- `getChangesCDCSDK` API that can be used by the client to get the latest set of changes. - -![Stateless CDC Service](/images/architecture/stateless_cdc_service.png) - -{{}} - -See [Change data capture](../../../additional-features/change-data-capture/) for more details and limitations. - -{{}} - -## CDC streams - -YugabyteDB automatically splits user tables into multiple shards (also called tablets) using either a hash- or range-based strategy. The primary key for each row in the table uniquely identifies the location of the tablet in the row. - -Each tablet has its own WAL file. WAL is NOT in-memory, but it is disk persisted. Each WAL preserves the order in which transactions (or changes) happened. Hybrid TS, Operation ID, and additional metadata about the transaction is also preserved. - -![How does CDC work](/images/explore/cdc-overview-work2.png) - -YugabyteDB normally purges WAL segments after some period of time. This means that the connector does not have the complete history of all changes that have been made to the database. Therefore, when the connector first connects to a particular YugabyteDB database, it starts by performing a consistent snapshot of each of the database schemas. - -The YugabyteDB Debezium connector captures row-level changes in the schemas of a YugabyteDB database. The first time it connects to a YugabyteDB cluster, the connector takes a consistent snapshot of all schemas. After that snapshot is complete, the connector continuously captures row-level changes that insert, update, and delete database content, and that were committed to a YugabyteDB database. - -![How does CDC work](/images/explore/cdc-overview-work.png) - -The core primitive of CDC is the _stream_. Streams can be enabled and disabled on databases. You can specify which tables to include or exclude. Every change to a watched database table is emitted as a record in a configurable format to a configurable sink. Streams scale to any YugabyteDB cluster independent of its size and are designed to impact production traffic as little as possible. - -Creating a new CDC stream returns a stream UUID. This is facilitated via the [yb-admin](../../../admin/yb-admin/#change-data-capture-cdc-commands) tool. A stream ID is created first, per database. You configure the maximum batch side in YugabyteDB, while the polling frequency is configured on the connector side. - -Connector tasks can consume changes from multiple tablets. At least once delivery is guaranteed. In turn, connector tasks write to the Kafka cluster, and tasks don't need to match Kafka partitions. Tasks can be independently scaled up or down. - -The connector produces a change event for every row-level insert, update, and delete operation that was captured, and sends change event records for each table in a separate Kafka topic. Client applications read the Kafka topics that correspond to the database tables of interest, and can react to every row-level event they receive from those topics. For each table, the default behavior is that the connector streams all generated events to a separate Kafka topic for that table. Applications and services consume data change event records from that topic. All changes for a row (or rows in the same tablet) are received in the order in which they happened. A checkpoint per stream ID and tablet is updated in a state table after a successful write to Kafka brokers. - -## CDC guarantees - -CDC in YugabyteDB provides technology to ensure that any changes in data due to operations (such as inserts, updates, and deletions) are identified, captured, and automatically applied to another data repository instance, or made available for consumption by applications and other tools. CDC provides the following guarantees. - -### Per-tablet ordered delivery - -All data changes for one row or multiple rows in the same tablet are received in the order in which they occur. Due to the distributed nature of the problem, however, gRPC replication does not guarantee order across tablets. - -Consider the following scenario: - -- Two rows are being updated concurrently. -- These two rows belong to different tablets. -- The first row `row #1` was updated at time `t1`, and the second row `row #2` was updated at time `t2`. - -In this case, it is possible for CDC to push the later update corresponding to `row #2` change to Kafka before pushing the earlier update, corresponding to `row #1`. - -### At-least-once delivery - -Updates for rows are pushed at least once. With the at-least-once delivery, you never lose a message, however the message might be delivered to a CDC consumer more than once. This can happen in case of a tablet leader change, where the old leader already pushed changes to Kafka, but the latest pushed `op id` was not updated in the CDC metadata. - -For example, a CDC client has received changes for a row at times `t1` and `t3`. It is possible for the client to receive those updates again. - -### No gaps in change stream - -When you have received a change for a row for timestamp `t`, you do not receive a previously unseen change for that row from an earlier timestamp. This guarantees that receiving any change implies that all earlier changes have been received for a row. diff --git a/docs/content/preview/architecture/docdb-replication/read-replicas.md b/docs/content/preview/architecture/docdb-replication/read-replicas.md deleted file mode 100644 index 8e4a54db03f2..000000000000 --- a/docs/content/preview/architecture/docdb-replication/read-replicas.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Read replicas in YugabyteDB -headerTitle: Read replicas -linkTitle: Read replicas -description: Learn about read replicas in YugabyteDB. -headContent: Replicate data asynchronously to one or more read replica clusters -menu: - preview: - identifier: architecture-docdb-replication-read-replicas - parent: architecture-docdb-replication - weight: 400 -type: docs ---- - -In addition to the core distributed consensus-based replication, DocDB extends Raft to add read replicas (also known as observer nodes) that do not participate in writes but get a timeline consistent copy of the data in an asynchronous manner. - -Read replicas are a read-only extension to the primary data in the universe. With read replicas, the primary data of the universe is replicated across multiple zones in one region, or across nearby regions. Read replicas do not add to the write latencies as the write does not synchronously replicate data to them. Instead, the data is replicated to read replicas asynchronously. - -Nodes in remote data centers can be added in read-only mode. This is typically used in cases where latency of doing a distributed consensus-based write is not tolerable for some workloads. - -## Replication factor - -Every YugabyteDB universe contains a primary data cluster, and one or more read replica clusters. Thus, each read replica cluster can independently have its own replication factor. - -The replication factor of a read replica cluster can be an even number as well. For example, a read replica cluster with a replication factor of 2 is perfectly valid. This is the case because read replicas do not participate in Raft consensus operation, and therefore an odd number of replicas is not required for correctness. - -## Writing to read replicas - -An application can send write requests to read replicas, but these write requests are internally redirected to the source of truth. This is possible because the read replicas are aware of the topology of the universe. - -## Schema changes - -Because read replicas are a Raft replication-level extension, the schema changes are transparently applied to these replicas. There is no need to execute DDL operations separately on the read replica cluster. - -## Read replicas vs. eventual consistency - -The read-only node (or timeline-consistent node) is still strictly better than eventual consistency, because with the latter the application's view of the data can move back and forth in time and is hard to program. - -## Learn more - -- [Setup read replicas](../../../explore/multi-region-deployments/read-replicas-ysql) diff --git a/docs/content/preview/architecture/docdb-replication/replication.md b/docs/content/preview/architecture/docdb-replication/replication.md deleted file mode 100644 index 7a7f4624f5f1..000000000000 --- a/docs/content/preview/architecture/docdb-replication/replication.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: Replication in DocDB -headerTitle: Synchronous replication -linkTitle: Synchronous -description: Learn how YugabyteDB uses the Raft consensus in DocDB to replicate data across multiple independent fault domains like nodes, zones, regions, and clouds. -headContent: Synchronous replication using the Raft consensus protocol -aliases: - - /preview/architecture/concepts/docdb/replication/ -menu: - preview: - identifier: architecture-docdb-replication-default - parent: architecture-docdb-replication - weight: 200 -type: docs ---- - -Using the [Raft distributed consensus protocol](../raft), DocDB automatically replicates data synchronously across the primary cluster in order to survive failures while maintaining data consistency and avoiding operator intervention. - -## Replication factor - -YugabyteDB replicates data across [fault domains](../../key-concepts#fault-domain) (which, depending on the deployment, could be nodes, availability zones, or regions) in order to tolerate faults. The replication factor (RF) is the number of copies of data in a YugabyteDB cluster. - -## Fault tolerance - -The fault tolerance (FT) of a YugabyteDB cluster is the maximum number of fault domain failures it can survive while continuing to preserve correctness of data. Fault tolerance and replication factor are correlated as follows: - -* To achieve a FT of `f` fault domains, the primary cluster has to be configured with a RF of at least `2f + 1`. - -The following diagram shows a cluster with FT 1. Data is replicated across 3 nodes, and the cluster can survive the failure of one fault domain. To make the cluster able to survive the failure of a zone or region, you would place the nodes in different zones or regions. - -![Raft group](/images/architecture/replication/raft-group.png) - -To survive the outage of 2 fault domains, a cluster needs at least 2 * 2 + 1 fault domains; that is, an RF of 5. With RF >= 5, if 2 fault domains are offline, the remaining 3 fault domains can continue to serve reads and writes without interruption. - -| Replication factor | Fault tolerance | Can survive failure of | -| :--- | :--- | :--- | -| 1 or 2 | 0 | 0 fault domains | -| 3 or 4 | 1 | 1 fault domain | -| 5 or 6 | 2 | 2 fault domains | -| 7 or 8 | 3 | 3 fault domains | - -## Tablet peers - -Replication of data in DocDB is achieved at the level of tablets, using tablet peers, with each table sharded into a set of tablets, as demonstrated in the following diagram: - -![Tablets in a table](/images/architecture/replication/tablets_in_a_docsb_table.png) - -Each tablet comprises of a set of tablet peers, each of which stores one copy of the data belonging to the tablet. There are as many tablet peers for a tablet as the replication factor, and they form a Raft group. The tablet peers are hosted on different nodes to allow data redundancy to protect against node failures. The replication of data between the tablet peers is strongly consistent. - -The following diagram shows three tablet peers that belong to a tablet called `tablet 1`. The tablet peers are hosted on different YB-TServers and form a Raft group for leader election, failure detection, and replication of the write-ahead logs. - -![Raft Replication](/images/architecture/raft_replication.png) - -## Raft replication - -As soon as a tablet initiates, it elects one of the tablet peers as the tablet leader using the [Raft](../raft) protocol. The tablet leader becomes responsible for processing user-facing write requests by translating the user-issued writes into the document storage layer of DocDB. In addition, the tablet leader replicates among the tablet peers using Raft to achieve strong consistency. Setting aside the tablet leader, the remaining tablet peers of the Raft group are called tablet followers. - -The set of DocDB updates depends on the user-issued write, and involves locking a set of keys to establish a strict update order, and optionally reading the older value to modify and update in case of a read-modify-write operation. The Raft log is used to ensure that the database state-machine of a tablet is replicated amongst the tablet peers with strict ordering and correctness guarantees even in the face of failures or membership changes. This is essential to achieving strong consistency. - -After the Raft log is replicated to a majority of tablet-peers and successfully persisted on the majority, the write is applied into the DocDB document storage layer and is subsequently available for reads. After the write is persisted on disk by the document storage layer, the write entries can be purged from the Raft log. This is performed as a controlled background operation without any impact to the foreground operations. - -## Multi-zone deployment - -The replicas of data can be placed across multiple [fault domains](../../key-concepts#fault-domain). The following examples of a multi-zone deployment with three zones and the replication factor assumed to be 3 demonstrate how replication across fault domains is performed in a cluster. - -In the case of a multi-zone deployment, the data in each of the tablets in a node is replicated across multiple zones using the Raft consensus algorithm. All the read and write queries for the rows that belong to a given tablet are handled by that tablet's leader, as per the following diagram: - -![Replication across zones](/images/architecture/replication/raft-replication-across-zones.png) - -As a part of the Raft replication, each tablet peer first elects a tablet leader responsible for serving reads and writes. The distribution of tablet leaders across different zones is determined by a user-specified data placement policy, which, in the preceding scenario, ensures that in the steady state, each of the zones has an equal number of tablet leaders. The following diagram shows how the tablet leaders are dispersed: - -![Tablet leader placement](/images/architecture/replication/optimal-tablet-leader-placement.png) - -{{}} -Tablet leaders are balanced across **zones** and the **nodes** in a zone. -{{}} - -## Tolerating a zone outage - -As soon as a zone outage occurs, YugabyteDB assumes that all nodes in that zone become unavailable simultaneously. This results in one-third of the tablets (which have their tablet leaders in the zone that just failed) not being able to serve any requests. The other two-thirds of the tablets are not affected. For the affected one-third, YugabyteDB automatically performs a failover to instances in the other two zones. Once again, the tablets being failed over are distributed across the two remaining zones evenly, as per the following diagram: - -![Automatic failover](/images/architecture/replication/automatic-failover-zone-outage.png) - -{{}} -Failure of **followers** has no impact on reads and writes. Only the tablet **leaders** serve reads and writes. -{{}} - -## RPO and RTO on zone outage - -The recovery point objective (RPO) for each of these tablets is 0, meaning no data is lost in the failover to another zone. The recovery time objective (RTO) is 3 seconds, which is the time window for completing the failover and becoming operational out of the new zones, as per the following diagram: - -![RPO vs RTO](/images/architecture/replication/rpo-vs-rto-zone-outage.png) - -## Follower reads - -Only the tablet leader can process user-facing write and read requests. Note that while this is the case for strongly consistent reads, YugabyteDB offers reading from followers with relaxed guarantees, which is desired in [some deployment models](../../../develop/build-global-apps/follower-reads/). All other tablet peers are called followers and merely replicate data. They are available as hot standbys that can take over quickly in case the leader fails. diff --git a/docs/content/preview/architecture/docdb-sharding/_index.md b/docs/content/preview/architecture/docdb-sharding/_index.md deleted file mode 100644 index dbfbf96949db..000000000000 --- a/docs/content/preview/architecture/docdb-sharding/_index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: DocDB sharding layer -headerTitle: DocDB sharding layer -linkTitle: Sharding -description: Learn about sharding strategies, hash and range sharding, colocated tables, and table splitting. -headcontent: Learn about sharding strategies, hash and range sharding, colocated tables, and table splitting. -menu: - preview: - identifier: architecture-docdb-sharding - parent: architecture - weight: 700 -type: indexpage ---- - -A distributed SQL database needs to automatically split the data in a table and distribute it across nodes. This is known as data sharding and it can be achieved through different strategies, each with its own tradeoffs. YugabyteDB's sharding architecture is inspired by Google Spanner. - -## Sharding - -YugabyteDB splits table data into smaller pieces called [tablets a.k.a shards](../key-concepts/#tablet). Sharding is the process of mapping of a row of a table to a shard. Sharding helps in scalability and geo-distribution by horizontally partitioning data. These shards are distributed across multiple server nodes (containers, virtual machines, bare-metal) in a shared-nothing architecture. The application interacts with a SQL table as one logical unit and remains agnostic to the physical placement of the shards. DocDB supports range and hash sharding natively. - -{{}} -To know more about the different sharding strategies and how they work, see [Sharding strategies](sharding/). -{{}} - -## Tablet splitting - -As table data grows, the size of tablets increase. Once a tablet reaches a threshold size, it automatically splits into two. These 2 new tablets can now be placed in other nodes to keep the load on the system balanced. Tablet splitting is one of the foundations of [scaling](../../explore/linear-scalability). - -{{}} -To understand how and when tablets split, see [Tablet splitting](tablet-splitting/). -{{}} diff --git a/docs/content/preview/architecture/docdb/_index.md b/docs/content/preview/architecture/docdb/_index.md deleted file mode 100644 index 552ad951e473..000000000000 --- a/docs/content/preview/architecture/docdb/_index.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -title: DocDB storage layer -headerTitle: DocDB storage layer -linkTitle: DocDB - Storage layer -description: Learn about the persistent storage layer of DocDB. -aliases: - - /architecture/concepts/docdb/ - - /preview/docdb/persistence/ -headcontent: The document store responsible for transactions, sharding, replication, and persistence -menu: - preview: - identifier: docdb - parent: architecture - weight: 600 -type: indexpage ---- - -DocDB is the underlying document storage engine of YugabyteDB and is built on top of a highly customized and optimized version of [RocksDB](http://rocksdb.org/), a [log-structured merge tree (LSM)](./lsm-sst)-based key-value store. Several enhancements and customizations have been made on top of the vanilla RocksDB to make DocDB highly performant and scalable. DocDB in essence manages multiple RocksDB instances which are created one per tablet. - -![DocDB Document Storage Layer](/images/architecture/docdb-rocksdb.png) - -## Data model - -DocDB is a persistent key-value store, which means that data is stored and retrieved using unique keys. It supports ordered data operations, allowing efficient range queries and iteration over keys. The keys are designed for fast lookups and efficient range scans. - -{{}} -To understand more about how row data is stored as keys and values in DocDB, see [DocDB data model](./data-model). -{{}} - -## Storage engine - -DocDB is a log-structured merge-tree (LSM) based storage engine. This design is optimized for high write throughput and efficient storage utilization. Data is stored in multiple SSTs (Sorted String Tables) to store key-value data on disk. It is designed to be efficient for both sequential and random access patterns. DocDB periodically compacts data by merging and sorting multiple SST files into a smaller set of files. This process helps to maintain a consistent on-disk format and reclaim space from obsolete data. - -{{}} -To understand more about how LSM tree stores data in SSTs, see [LSM and SST](./lsm-sst). -{{}} - -## Performance - -DocDB is written in C++ and is designed to be highly performant on Linux. - -{{}} -To understand more about how DocDB enhances RocksDB, see [Performance](./performance). -{{}} diff --git a/docs/content/preview/architecture/docdb/performance.md b/docs/content/preview/architecture/docdb/performance.md deleted file mode 100644 index b1945915efb0..000000000000 --- a/docs/content/preview/architecture/docdb/performance.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: DocDB performance enhancements to RocksDB -headerTitle: Performance -linkTitle: Performance -description: Learn how DocDB enhances RocksDB for scale and performance. -aliases: - - /preview/architecture/concepts/docdb/performance/ -menu: - preview: - identifier: docdb-performance - parent: docdb - weight: 400 -type: docs ---- - -DocDB is built on a customized version of [RocksDB](http://rocksdb.org/), a [log-structured merge tree (LSM)](../lsm-sst)-based key-value store. A tremendous number of optimizations have been implemented to make RocksDB work as a critical component of a scalable distributed database. Let's go over some of the most significant changes. - -## Efficient modeling of documents - -The goal of one of the enhancements is to implement a flexible data model on top of a key-value store, as well as to implement efficient operations on this data model such as the following: - -* Fine-grained updates to a part of the row or collection without incurring a read-modify-write penalty of the entire row or collection. -* Deleting or overwriting a row, collection, or object at an arbitrary nesting level without incurring a read penalty to determine what specific set of key-value pairs need to be deleted. -* Enforcing row- and object-level TTL-based expiration. - -A tighter coupling into the read and compaction layers of the underlying RocksDB key-value store was needed. RocksDB is used as an append-only store, and operations (such as row or collection delete) are modeled as an insert of a special delete marker. This allows deleting an entire subdocument efficiently by adding one key-value pair to RocksDB. Read hooks automatically recognize these markers and suppress expired data. Expired values within the subdocument are cleaned up and garbage-collected by customized compaction hooks. - -## Raft vs. RocksDB WAL logs - -DocDB uses Raft for replication. Changes to the distributed system are already recorded or journaled as part of Raft logs. When a change is accepted by a majority of peers, it is applied to each tablet peer’s DocDB, but the additional write-ahead logging (WAL) mechanism in RocksDB was unnecessary and would add overhead. For correctness, in addition to disabling the WAL mechanism in RocksDB, YugabyteDB tracks the Raft sequence ID up to which data has been flushed from RocksDB’s memtables to SSTable files. This ensures that the Raft WAL logs can be correctly garbage-collected. It also allows to replay the minimal number of records from Raft WAL logs on a server crash or restart. - -## MVCC at a higher layer - -Multi-version concurrency control (MVCC) in DocDB is done at a higher layer and does not use the MVCC mechanism of RocksDB. - -The mutations to records in the system are versioned using hybrid timestamps maintained at the YBase layer. As a result, the notion of MVCC as implemented in RocksDB using sequence IDs was not necessary and would only add overhead. YugabyteDB does not use RocksDB’s sequence IDs; instead, it uses hybrid timestamps that are part of the encoded key to implement MVCC. - -## Load balancing across disks - -When multiple disks are available for storage, DocDB will distribute the SST and WAL files of various tablets of tables evenly across the attached disks on a per-table basis. This load distribution (also known as striping) ensures that each disk handles an even amount of load for each table. - -## Backups and snapshots - -Backups and snapshots needed to be higher-level operations that take into consideration data in DocDB, as well as in the Raft logs to obtain a consistent cut of the state of the system. - -## Data model-aware bloom filters - -The keys stored by DocDB consist of a number of components, where the first component is a document key, followed by a few scalar components, and finally followed by a timestamp (sorted in reverse order). - -The bloom filter needs to be aware of what components of the key should be added to the bloom, so that only the relevant SSTable files in the LSM store are being searched during a read operation. - -In a traditional key-value store, range scans do not make use of bloom filters because exact keys that fall in the range are unknown. However, a data-model-aware bloom filter was implemented, where range scans within keys that share the same hash component can also benefit from bloom filters. For example, a scan to get all the columns within a row or all the elements of a collection can also benefit from bloom filters. - -## Range query optimizations - -The ordered (or range) components of the compound-keys in DocDB often have a natural order. For example, it may be an `int` that represents a message ID (for a messaging application) or a timestamp (for a IoT time series). By keeping hints with each SSTable file in the LSM store about the minimum and maximum values for these components of the key, range queries can intelligently prune away the lookup of irrelevant SSTable files during the read operation. - -Consider the following example: - -```sql -SELECT message_txt - FROM messages -WHERE user_id = 17 - AND message_id > 50 - AND message_id < 100; -``` - -The following example illustrates a time series application: - -```sql -SELECT metric_value - FROM metrics -WHERE metric_name = ’system.cpu’ - AND metric_timestamp < ? - AND metric_timestamp > ? -``` - -## Server-global block cache - -DocDB uses a shared block cache across all the RocksDB instances of the tablets hosted by a YB-TServer. This maximizes the use of memory resources and avoids creating silos of cache that each need to be sized accurately for different user tables. This reduces the memory usage per tablet and can be allocated at a server level - -## Server-global memstore limits - -While per-memstore flush sizes can be configured, in practice, because the number of memstores may change over time as users create new tables or tablets of a table move between servers, a storage engine was enhanced to enforce a global memstore threshold. When such a threshold is reached, selection of which memstore to flush takes into account which memstores carry the oldest records (determined by hybrid timestamps) and, therefore, are holding up Raft logs and preventing them from being garbage-collected. - -## Scan-resistant block cache - -DocDB enhances the default block cache implementation to be scan-resistant. The motivation was to prevent operations such as long-running scans (for example, due to an occasional large query or background Spark jobs) from polluting the entire cache with poor quality data and wiping out useful data. diff --git a/docs/content/preview/architecture/key-concepts.md b/docs/content/preview/architecture/key-concepts.md deleted file mode 100644 index b944df92962a..000000000000 --- a/docs/content/preview/architecture/key-concepts.md +++ /dev/null @@ -1,222 +0,0 @@ ---- -title: Key concepts -headerTitle: Key concepts -linkTitle: Key concepts -description: Learn about the Key concepts in YugabyteDB -headcontent: Glossary of key concepts -aliases: - - /preview/architecture/concepts - - /preview/architecture/concepts/universe - - /preview/architecture/concepts/single-node/ - - /preview/key-concepts/ -menu: - preview: - identifier: architecture-concepts-universe - parent: reference - weight: 10 -type: docs ---- - -## ACID - -ACID stands for Atomicity, Consistency, Isolation, and Durability. These are a set of properties that guarantee that database transactions are processed reliably. - -- Atomicity: All the work in a transaction is treated as a single atomic unit - either all of it is performed or none of it is. -- Consistency: A completed transaction leaves the database in a consistent internal state. This can either be all the operations in the transactions succeeding or none of them succeeding. -- Isolation: This property determines how and when changes made by one transaction become visible to the other. For example, a serializable isolation level guarantees that two concurrent transactions appear as if one executed after the other (that is, as if they occur in a completely isolated fashion). -- Durability: The results of the transaction are permanently stored in the system. The modifications must persist even in the instance of power loss or system failures. - -YugabyteDB provides ACID guarantees for all [transactions](#transaction). - -## CDC - Change data capture - -CDC is a software design pattern used in database systems to capture and propagate data changes from one database to another in real-time or near real-time. YugabyteDB supports transactional CDC guaranteeing changes across tables are captured together. This enables use cases like real-time analytics, data warehousing, operational data replication, and event-driven architectures. {{}} - -## Cluster - -A cluster is a group of [nodes](#node) on which YugabyteDB is deployed. The table data is distributed across the various [nodes](#node) in the cluster. Typically used as [*Primary cluster*](#primary-cluster) and [*Read replica cluster*](#read-replica-cluster). - -{{}} -Sometimes the term *cluster* is used interchangeably with the term *universe*. However, the two are not always equivalent, as described in [Universe](#universe). -{{}} - -## DocDB - -DocDB is the underlying document storage engine of YugabyteDB and is built on top of a highly customized and optimized verison of [RocksDB](http://rocksdb.org/). {{}} - -## Fault domain - -A fault domain is a potential point of failure. Examples of fault domains would be nodes, racks, zones, or entire regions. {{}} - -## Fault tolerance - -YugabyteDB achieves resiliency by replicating data across fault domains using the Raft consensus protocol. The [fault domain](#fault-domain) can be at the level of individual nodes, availability zones, or entire regions. - -The fault tolerance determines how resilient the cluster is to domain (that is, node, zone, or region) outages, whether planned or unplanned. Fault tolerance is achieved by adding redundancy, in the form of additional nodes, across the fault domain. Due to the way the Raft protocol works, providing a fault tolerance of `ft` requires replicating data across `2ft + 1` domains. This number is referred to as the [replication factor](#replication-factor-rf). For example, to survive the outage of 2 nodes, a cluster needs 2 * 2 + 1 nodes; that is, a replication factor of 5. While the 2 nodes are offline, the remaining 3 nodes can continue to serve reads and writes without interruption. - -## Follower reads - -Normally, only the [tablet leader](#tablet-leader) can process user-facing write and read requests. Follower reads allow you to lower read latencies by serving reads from the tablet followers. This is similar to reading from a cache, which can provide more read IOPS with low latency. The data might be slightly stale, but is timeline-consistent, meaning no out of order data is possible. - -Follower reads are particularly beneficial in applications that can tolerate staleness. For instance, in a social media application where a post gets a million likes continuously, slightly stale reads are acceptable, and immediate updates are not necessary because the absolute number may not really matter to the end-user reading the post. In such cases, a slightly older value from the closest replica can achieve improved performance with lower latency. Follower reads are required when reading from [read replicas](#read-replica-cluster). {{}} - -## Hybrid time - -Hybrid time/timestamp is a monotonically increasing timestamp derived using [Hybrid Logical clock](../transactions/transactions-overview/#hybrid-logical-clocks). Multiple aspects of YugabyteDB's transaction model are based on hybrid time. {{}} - -## Isolation levels - -[Transaction](#transaction) isolation levels define the degree to which transactions are isolated from each other. Isolation levels determine how changes made by one transaction become visible to other concurrent transactions. {{}} - -{{}} -YugabyteDB offers 3 isolation levels - [Serializable](../../explore/transactions/isolation-levels/#serializable-isolation), [Snapshot](../../explore/transactions/isolation-levels/#snapshot-isolation) and [Read committed](../../explore/transactions/isolation-levels/#read-committed-isolation) - in the {{}} API and one isolation level - [Snapshot](../../develop/learn/transactions/acid-transactions-ycql/) - in the {{}} API. -{{}} - -## Leader balancing - -YugabyteDB tries to keep the number of leaders evenly distributed across the [nodes](#node) in a cluster to ensure an even distribution of load. - -## Leader election - -Amongst the [tablet](#tablet) replicas, one tablet is elected [leader](#tablet-leader) as per the [Raft](../docdb-replication/raft) protocol. {{}} - -## Master server - -The [YB-Master](../yb-master/) service is responsible for keeping system metadata, coordinating system-wide operations, such as creating, altering, and dropping tables, as well as initiating maintenance operations such as load balancing. {{}} - -{{}} -The master server is also typically referred as just **master**. -{{}} - -## MVCC - -MVCC stands for Multi-version Concurrency Control. It is a concurrency control method used by YugabyteDB to provide access to data in a way that allows concurrent queries and updates without causing conflicts. {{}} - -## Namespace - -A namespace refers to a logical grouping or container for related database objects, such as tables, views, indexes, and other database constructs. Namespaces help organize and separate these objects, preventing naming conflicts and providing a way to control access and permissions. - -A namespace in YSQL is referred to as a database and is logically identical to a namespace in other RDBMS (such as PostgreSQL). - - A namespace in YCQL is referred to as a keyspace and is logically identical to a keyspace in Apache Cassandra's CQL. - -## Node - -A node is a virtual machine, physical machine, or container on which YugabyteDB is deployed. - -## OID - -Object Identifier (OID) is a unique identifier assigned to each database object, such as tables, indexes, views, functions, and other system objects. They are assigned automatically and sequentially by the system when new objects are created. - -While OIDs are an integral part of PostgreSQL's internal architecture, they are not always visible or exposed to users. In most cases, users interact with database objects using their names rather than their OIDs. However, there are cases where OIDs become relevant, such as when querying system catalogs or when dealing with low-level database operations. - -{{}} -OIDs are unique only in the context of a specific universe and are not guaranteed to be unique across different universes. -{{}} - -## Preferred region - -By default, YugabyteDB distributes client requests equally across the regions in a cluster. If application reads and writes are known to be originating primarily from a single region, you can designate a preferred region, which pins the [tablet leaders](#tablet-leader) to that single region. As a result, the preferred region handles all read and write requests from clients. Non-preferred regions are used only for hosting tablet follower replicas. - -Designating one region as preferred can reduce the number of network hops needed to process requests. For lower latencies and best performance, set the region closest to your application as preferred. If your application uses a [smart driver](#smart-driver), you can set the topology keys to target the preferred region. This means that the smart driver will distribute connections uniformly among the nodes in the preferred region, further optimizing performance. - -Regardless of the preferred region setting, data is replicated across all the regions in the cluster to ensure region-level fault tolerance. - -You can enable [follower reads](#follower-reads) to serve reads from non-preferred regions. In cases where the cluster has [read replicas](#read-replica-cluster) and a client connects to a read replica, reads are served from the replica; writes continue to be handled by the preferred region. {{}} - -## Primary cluster - -A primary cluster can perform both writes and reads, unlike a [read replica cluster](#read-replica-cluster), which can only serve reads. A [universe](#universe) can have only one primary cluster. Replication between [nodes](#node) in a primary cluster is performed synchronously. - -## Raft - -Raft stands for Replication for availability and fault tolerance. This is the algorithm that YugabyteDB uses for replication guaranteeing consistency. {{}} - -## Read replica cluster - -Read replica clusters are optional clusters that can be set up in conjunction with a [primary cluster](#primary-cluster) to perform only reads; writes sent to read replica clusters get automatically rerouted to the primary cluster of the [universe](#universe). These clusters enable reads in regions that are far away from the primary cluster with timeline-consistent data. This ensures low latency reads for geo-distributed applications. - -Data is brought into the read replica clusters through asynchronous replication from the primary cluster. In other words, [nodes](#node) in a read replica cluster act as Raft observers that do not participate in the write path involving the Raft leader and Raft followers present in the primary cluster. Reading from read replicas requires enabling [follower reads](#follower-reads). {{}} - -## Rebalancing - -Rebalancing is the process of keeping an even distribution of tablets across the [nodes](#node) in a cluster. {{}} - -## Region - -A region refers to a defined geographical area or location where a cloud provider's data centers and infrastructure are physically located. Typically a region consists of one or more [zones](#zone). Examples of regions include `us-east-1` (Northern Virginia), `eu-west-1` (Ireland), and `us-central1` (Iowa). - -## Replication factor (RF) - -The number of copies of data in a YugabyteDB universe. YugabyteDB replicates data across [fault domains](#fault-domain) (for example, zones) in order to tolerate faults. [Fault tolerance](#fault-tolerance) (FT) and RF are correlated. To achieve a FT of k nodes, the universe has to be configured with a RF of (2k + 1). - -The RF should be an odd number to ensure majority consensus can be established during failures. {{}} - -Each [read replica](#read-replica-cluster) cluster can also have its own replication factor. In this case, the replication factor determines how many copies of your primary data the read replica has; multiple copies ensure the availability of the replica in case of a node outage. Replicas *do not* participate in the primary cluster Raft consensus, and do not affect the fault tolerance of the primary cluster or contribute to failover. - -## Sharding - -Sharding is the process of mapping a table row to a [tablet](#tablet). YugabyteDB supports 2 types of sharding, Hash and Range. {{}} - -## Smart driver - -A smart driver in the context of YugabyteDB is essentially a PostgreSQL driver with additional "smart" features that leverage the distributed nature of YugabyteDB. These smart drivers intelligently distribute application connections across the nodes and regions of a YugabyteDB cluster, eliminating the need for external load balancers. This results in balanced connections that provide lower latencies and prevent hot nodes. For geographically-distributed applications, the driver can seamlessly connect to the geographically nearest regions and availability zones for lower latency. - -Smart drivers are optimized for use with a distributed SQL database, and are both cluster-aware and topology-aware. They keep track of the members of the cluster as well as their locations. As nodes are added or removed from clusters, the driver updates its membership and topology information. The drivers read the database cluster topology from the metadata table, and route new connections to individual instance endpoints without relying on high-level cluster endpoints. The smart drivers are also capable of load balancing read-only connections across the available YB-TServers. -. {{}} - -## Tablet - -YugabyteDB splits a table into multiple small pieces called tablets for data distribution. The word "tablet" finds its origins in ancient history, when civilizations utilized flat slabs made of clay or stone as surfaces for writing and maintaining records. {{}} - -{{}} -Tablets are also referred as shards. -{{}} - -## Tablet follower - -See [Tablet leader](#tablet-leader). - -## Tablet leader - -In a cluster, each [tablet](#tablet) is replicated as per the [replication factor](#replication-factor-rf) for high availability. Amongst these tablet replicas one tablet is elected as the leader and is responsible for handling writes and consistent reads. The other replicas are called followers. - -## Tablet splitting - -When a tablet reaches a threshold size, it splits into 2 new [tablets](#tablet). This is a very quick operation. {{}} - -## Transaction - -A transaction is a sequence of operations performed as a single logical unit of work. YugabyteDB provides [ACID](#acid) guarantees for transactions. {{}} - -## TServer - -The [YB-TServer](../yb-tserver) service is responsible for maintaining and managing table data in the form of tablets, as well as dealing with all the queries. {{}} - -## Universe - -A YugabyteDB universe comprises one [primary cluster](#primary-cluster) and zero or more [read replica clusters](#read-replica-cluster) that collectively function as a resilient and scalable distributed database. - -{{}} -Sometimes the terms *universe* and *cluster* are used interchangeably. The two are not always equivalent, as a universe can contain one or more [clusters](#cluster). -{{}} - -## xCluster - -xCluster is a type of deployment where data is replicated asynchronously between two [universes](#universe) - a primary and a standby. The standby can be used for disaster recovery. YugabyteDB supports transactional xCluster {{}}. - -## YCQL - -Semi-relational SQL API that is best fit for internet-scale OLTP and HTAP apps needing massive write scalability as well as blazing-fast queries. It supports distributed transactions, strongly consistent secondary indexes, and a native JSON column type. YCQL has its roots in the Cassandra Query Language. {{}} - -## YQL - -The YugabyteDB Query Layer (YQL) is the primary layer that provides interfaces for applications to interact with using client drivers. This layer deals with the API-specific aspects such as query/command compilation and the run-time (data type representations, built-in operations, and more). {{}} - -## YSQL - -Fully-relational SQL API that is wire compatible with the SQL language in PostgreSQL. It is best fit for RDBMS workloads that need horizontal write scalability and global data distribution while also using relational modeling features such as JOINs, distributed transactions, and referential integrity (such as foreign keys). Note that YSQL reuses the native query layer of the PostgreSQL open source project. {{}} - -## Zone - -Typically referred as Availability Zones or just AZ, a zone is a datacenter or a group of colocated datacenters. Zone is the default [fault domain](#fault-domain) in YugabyteDB. diff --git a/docs/content/preview/architecture/query-layer/_index.md b/docs/content/preview/architecture/query-layer/_index.md deleted file mode 100644 index ba6852705c86..000000000000 --- a/docs/content/preview/architecture/query-layer/_index.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: YugabyteDB Query Layer (YQL) -headerTitle: Query layer -linkTitle: YQL - Query layer -description: Understand how a query is processed -aliases: - - /preview/architecture/query-layer/overview/ -menu: - preview: - identifier: architecture-query-layer - parent: architecture - weight: 500 -showRightNav: true -type: indexpage ---- - - -The YugabyteDB Query Layer (YQL) is the primary layer that provides interfaces for applications to interact with using client drivers. This layer deals with the API-specific aspects such as query and command compilation, as well as the runtime functions such as data type representations, built-in operations, and so on. From the application perspective, YQL is stateless and the clients can connect to one or more YB-TServers on the appropriate port to perform operations against a YugabyteDB cluster. - -![Query layer](/images/architecture/query_layer.png) - -Although YQL is designed with extensibility in mind, allowing for new APIs to be added, it currently supports two types of distributed SQL APIs: [YSQL](../../api/ysql/) and [YCQL](../../api/ycql/). - -- [YSQL](../../api/ysql/) is a distributed SQL API that is built by reusing the PostgreSQL language layer code. It is a stateless SQL query engine that is wire-format compatible with PostgreSQL. The default port for YSQL is 5433. -- [YCQL](../../api/ycql/) is a semi-relational language that has its roots in Cassandra Query Language. It is a SQL-like language built specifically to be aware of the clustering of data across nodes. The default port for YCQL is 9042. - -## Query processing - -The primary function of the query layer is to process the queries sent by an application. The YQL processes the queries sent by an application in phases via four internal components. - -{{}} -It's important to note that you don't need to worry about these internal processes. YugabyteDB automatically handles them when you submit a query. -{{}} - -### Parser - -The parser processes each query in several steps as follows: - -1. Checks the query: The parser first checks if the query is written correctly and follows the proper SQL syntax rules. If there are any syntax errors, it returns an error message. - -1. Builds a parse tree: If the query is written correctly, the parser builds a structured representation of the query, called a parse tree. This parse tree captures the different parts of the query and how they are related. - -1. Recognizes keywords and identifiers: To build the parse tree, the parser first identifies the different components of the query, such as keywords (like SELECT, FROM), table or column names, and other identifiers. - -1. Applies grammar rules: The parser then applies a set of predefined grammar rules to understand the structure and meaning of the query based on the identified components. - -1. Runs semantic analysis: After building the parse tree, the parser performs a semantic analysis to understand the detailed meaning of the query. It looks up information about the tables, columns, functions, and operators referenced in the query to ensure they exist and are being used correctly. - -1. Creates a query tree: The semantic analysis step creates a new data structure called the query tree, which represents the complete, semantically understood version of the query. - -The reason for separating the initial parsing and the semantic analysis is to allow certain types of queries (like transaction control commands) to be executed quickly without the need for a full semantic analysis. The query tree contains more detailed information about data types, functions, and expressions used in the query, making it easier for the system to execute the query correctly. - -### Analyzer - -The created query tree is then analyzed, rewritten, and transformed based on any rules stored in the system catalog. - -Views are realized during this phase. Whenever a query against a view (that is, a virtual table) is made, the original query is rewritten to a query that accesses the base tables given in the view definition instead. - -### Planner - -The YugabyteDB query planner plays a crucial role in efficiently executing SQL queries across multiple nodes. It extends the capabilities of the traditional single node query planner to handle distributed data and execution. - -The planner first analyzes different ways a query can be executed based on the available data and indexes. It considers various strategies like scanning tables sequentially or using indexes to quickly locate specific data. - -After determining the optimal plan, the planner generates a detailed execution plan with all the necessary steps, such as scanning tables, joining data, filtering rows, sorting, and computing expressions. - -The execution plan is then passed to the query executor component, which carries out the plan and returns the final query results. - -{{}} -To learn how the query planner decides the optimal path for query execution, see [Query Planner](./planner-optimizer/) -{{}} - -### Executor - -After the query planner determines the optimal execution plan, the executor runs the plan and retrieves the required data. The executor sends requests to the other YB-TServers that hold the data needed to perform sorts, joins, and aggregations, then evaluates qualifications, and finally returns the derived rows. - -The executor works in a step-by-step fashion, recursively processing the plan from top to bottom. Each node in the plan tree is responsible for fetching or computing rows of data as requested by its parent node. - -For example, if the top node is a "Merge Join" node, it first requests rows from its two child nodes (the left and right inputs to be joined). The executor recursively calls the child nodes to retrieve rows. - -A child node may be a "Sort" node, which requests rows from its child, sorts them, and returns the sorted rows. The bottom-most child could be a "Sequential Scan" node that reads rows directly from a table. - -As the executor requests rows from each node, that node fetches or computes the rows from its children, applies any filtering or data transformations specified in the query plan, and returns the requested rows up to its parent node. - -This process continues recursively until the top node has received all the rows it needs to produce the final result. For a SELECT query, these final rows are sent to the client. For data modification queries like INSERT, UPDATE, or DELETE, the rows are used to make the requested changes in the database tables. - -The executor is designed to efficiently pull rows through the pipeline defined by the plan tree, processing rows in batches where possible for better performance. - -### Optimizations - -- **Incremental sort**. If an intermediate query result is known to be sorted by one or more leading keys of a required sort ordering, the additional sorting can be done considering only the remaining keys, if the rows are sorted in batches that have equal leading keys. - -- **Memoize results**. When only a small percentage of rows is checked on the inner side of a nested-loop join, the executor memoizes the results for improving performance. - -- **Disk-based hash aggregation**. Hash-based operations are generally more sensitive to memory availability and are highly efficient as long as the hash table fits within the memory specified by the work_mem parameter. When the hash table grows beyond the `work_mem` limit, the planner transitions to a disk-based hash aggregation plan. This avoids overloading memory and ensures that large datasets can be handled efficiently. - -## Query ID - -In YSQL, to provide a consistent way to track and identify specific queries across different parts of the system such as logs, performance statistics, and EXPLAIN plans, a unique identifier is generated for each query processed. The query ID is effectively a hash value based on the normalized form of the SQL query. This normalization process removes insignificant whitespace and converts literal values to placeholders, ensuring that semantically identical queries have the same ID. This provides the following benefits: - -- By providing a unique identifier for each query, it becomes much easier to analyze query performance and identify problematic queries. -- Including query IDs in logs and performance statistics enables more detailed and accurate monitoring of database activity. -- The EXPLAIN command, which shows the execution plan for a query, can also display the query ID. This helps to link the execution plan with the actual query execution statistics. -- The pg_stat_statements extension (which is installed by default in YugabyteDB) can accurately track and report statistics even for queries with varying literal values (for example, different WHERE clause parameters). This makes it much easier to identify performance bottlenecks caused by specific query patterns. - -Generation of this unique query ID is controlled using the `compute_query_id` setting, which can have the following values: - -- on - Always compute query IDs. -- off - Never compute query IDs. -- auto (the default) - Automatically compute query IDs when needed, such as when pg_stat_statements is enabled (pg_stat_statements is enabled by default). - -You should enable `compute_query_id` to fully realize its benefits for monitoring and performance analysis. diff --git a/docs/content/preview/architecture/transactions/_index.md b/docs/content/preview/architecture/transactions/_index.md deleted file mode 100644 index bc7c5bd3b2d8..000000000000 --- a/docs/content/preview/architecture/transactions/_index.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -title: DocDB transactions layer -headerTitle: DocDB transactions layer -linkTitle: Transactions -description: DocDB transactions layer -headcontent: Understand how distributed transactions work -aliases: - - /architecture/transactions/ -menu: - preview: - identifier: architecture-acid-transactions - parent: architecture - weight: 900 -type: indexpage ---- - -Transactions and strong consistency are a fundamental requirement for any RDBMS. YugabyteDB's distributed transaction architecture supports fully distributed [ACID](../key-concepts#acid) transactions across rows, multiple tablets, and multiple nodes at any scale, and is inspired by [Google Spanner](https://research.google.com/archive/spanner-osdi2012.pdf). Transactions can span across tables in DocDB. - -## Fundamentals - -One of the basic challenges in a distributed system is how to manage the time disparity between different machines. This is very critical for distributed transactions and multi version concurrency control. YugabyteDB uses Hybrid Logical clocks to generate a monotonically increasing timestamp. - -{{}} -To learn about more about Hybrid Time and MVCC, see [Transaction fundamentals](transactions-overview/). -{{}} - -## Distributed transactions - -Ensuring the [ACID](../key-concepts/#acid) guarantees in a distributed database is a challenge. There are multiple components involved and yet the transaction has to be successfully executed even in the case of component failures. - -{{}} -To understand how failures are handled during transactions, see [Distributed transactions](distributed-txns/). -{{}} - -## Transaction execution - -There are multiple components and stages involved in the execution of a distributed transaction from start to commit. A transaction manager co-ordinates the transaction and finally commits or aborts the transaction as needed, the transaction status tablet maintains the status, the provisional database stores the temporary records. - -{{}} -To understand how a transaction is executed, see [Transactional I/O path](transactional-io-path/). -{{}} - -## Single-row transactions - -In cases where keys involved in the transaction are located in the same tablet, YugabyteDB has optimizations to execute the transaction much faster. The transaction manager of YugabyteDB automatically detects transactions that update a single row (as opposed to transactions that update rows across tablets or nodes). In order to achieve high performance, the updates to a single row directly update the row without having to interact with the transaction status tablet using a single row transaction path (also known as fast path). - -{{}} -To know more about single-row and single-shard transactions, see [Single-row transactions](single-row-transactions/). -{{}} - -## Isolation levels - -Isolation levels in databases refer to the degree of isolation between concurrent transactions, which determines how much each transaction is affected by the actions of others. YugabyteDB provides 3 isolation levels, Snapshot, Serializable, and Read Committed with the same isolation guarantees as PostgreSQL's `REPEATABLE READ`, `SERIALIZABLE` and `READ COMMITTED` respectively. Understanding and properly configuring the isolation levels in a database is crucial for ensuring data consistency and optimizing the performance of concurrent transactions. - -{{}} -To understand the different isolation levels and understand how they work, see [Transaction isolation levels](isolation-levels/). -{{}} - -## Concurrency control - -Concurrency control is a key mechanism in databases that ensures the correct and consistent execution of concurrent transactions. It is responsible for managing and coordinating multiple, simultaneous access to the same data to detect conflicts, maintain data integrity, and prevent anomalies. YugabyteDB uses two strategies for concurrency control, Fail-on-conflict and Wait-on-conflict. - -{{}} -To learn how YugabyteDB handles conflicts between concurrent transactions, see [Concurrency control](concurrency-control/). -{{}} - -## Explicit locking - -As with PostgreSQL, YugabyteDB provides various row-level lock modes to control concurrent access to data in tables. These modes can be used for application-controlled locking in cases where MVCC does not provide the desired behavior. - -{{}} -To learn about the different locking mechanism, see [Explicit locking](../../explore/transactions/explicit-locking). -{{}} - -## Transaction priorities - -Transaction priorities in databases refer to the order in which transactions are executed when there are conflicting operations or resource contention. The priorities are used to determine which transaction should be given preference when resolving conflicts. Some transactions could be aborted. - -{{}} -To learn how YugabyteDB decides which transactions should be aborted in case of conflict, see [Transaction priorities](transaction-priorities/). -{{}} - -## Read committed - -Read Committed is the isolation level in which, clients do not need to retry or handle serialization errors (40001) in application logic. - -{{}} -To understand how Read committed is implement and how to use it, see [Read committed](read-committed/). -{{}} - -## Read restart error - -Read restart errors, also known as read skew or read consistency errors, are a type of concurrency control issue that can occur when using certain isolation levels. Although YugabyteDB has optimizations to resolve most scenarios automatically, depending on the level of clock skew, it can throw this error. - -{{}} -To understand when this error could be thrown, see [Read restart error](read-restart-error/) -{{}}. diff --git a/docs/content/preview/architecture/transactions/isolation-levels.md b/docs/content/preview/architecture/transactions/isolation-levels.md deleted file mode 100644 index c5b25a084490..000000000000 --- a/docs/content/preview/architecture/transactions/isolation-levels.md +++ /dev/null @@ -1,187 +0,0 @@ ---- -title: Transaction isolation levels -headerTitle: Transaction isolation levels -linkTitle: Isolation levels -description: Learn how YugabyteDB supports two transaction isolation levels Snapshot Isolation and Serializable. -menu: - preview: - identifier: architecture-isolation-levels - parent: architecture-acid-transactions - weight: 500 -type: docs ---- - -Transaction isolation is foundational to handling concurrent transactions in databases. The SQL-92 standard defines four levels of transaction isolation (in decreasing order of strictness): Serializable, Repeatable Read, Read Committed, and Read Uncommitted. - -YugabyteDB supports the following three strictest transaction isolation levels: - -1. Read Committed, which maps to the SQL isolation level of the same name. This isolation level guarantees that each statement sees all data that has been committed before it is issued (this implicitly also means that the statement sees a consistent snapshot). In addition, this isolation level internally handles read restart and conflict errors. In other words, the client does not see read restart and conflict errors (barring an exception). -2. Serializable, which maps to the SQL isolation level of the same name. This isolation level guarantees that transactions run in a way equivalent to a serial (sequential) schedule. -3. Snapshot, which maps to the SQL Repeatable Read isolation level. This isolation level guarantees that all reads made in a transaction see a consistent snapshot of the database, and the transaction itself can successfully commit only if no updates it has made conflict with any concurrent updates made by transactions that committed after that snapshot. - -Transaction isolation level support differs between the YSQL and YCQL APIs: - -- [YSQL](../../../api/ysql/) supports Serializable, Snapshot, and Read Committed isolation levels. -- [YCQL](../../../api/ycql/dml_transaction/) supports only Snapshot isolation using the `BEGIN TRANSACTION` syntax. - -Similarly to PostgreSQL, you can specify Read Uncommitted for YSQL, but it behaves the same as Read Committed. - -Read Committed is supported only if the YB-TServer flag `yb_enable_read_committed_isolation` is set to `true`. By default, this flag is `false`, in which case the Read Committed isolation level of YugabyteDB's transactional layer falls back to the stricter Snapshot isolation. The default isolation level for the YSQL API is essentially Snapshot because Read Committed, which is the YSQL API and PostgreSQL syntactic default, maps to Snapshot isolation. - -## Internal locking in DocDB - -In order to support the three isolation levels, the lock manager internally supports the following three types of locks: - -- Serializable read lock is taken by serializable transactions on values that they read in order to guarantee they are not modified until the transaction commits. - -- Serializable write lock is taken by serializable transactions on values they write. - -- Snapshot isolation write lock is taken by a snapshot isolation (and also read committed) transaction on values that it modifies. - -The following matrix shows conflicts between these types of locks at a high level: - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Snapshot isolation writeSerializable writeSerializable read
Snapshot isolation write✘ Conflict✘ Conflict✘ Conflict
Serializable write✘ Conflict✔ No conflict✘ Conflict
Serializable read✘ Conflict✘ Conflict✔ No conflict
- -That is, serializable read locks block writers but allow other simultaneous readers. Serializable write locks block readers as expected but not other serializable writers. Finally, snapshot isolation write locks block all other readers and writers. - -Because serializable write locks do not block other serializable writers, concurrent blind writes are allowed at the serializable isolation level. A blind write is a write to a location that has not been previously read by that transaction. Two serializable transactions blindly writing to the same location can proceed in parallel assuming there are no other conflicts; the value of the location afterwards will be the value written by the transaction that committed last. - -Although described here as a separate lock type for simplicity, the snapshot isolation write lock type is actually implemented internally as a combination of the other two lock types. That is, taking a single snapshot isolation write lock is equivalent to taking both a serializable read lock and a serializable write lock. - -## Locking granularities - -Locks can be taken at many levels of granularity. For example, a serializable read lock could be taken at the level of an entire tablet, a single row, or a single column of a single row. Such a lock will block attempts to take write locks at that or finer granularities. Thus, for example, a read lock taken at the row level will block attempts to write to that entire row or any column in that row. - -In addition to the above-mentioned levels of granularity, locks in DocDB can be taken at prefixes of the primary key columns, treating the hash columns as a single unit. For example, if you created a YSQL table via: - -```sql -CREATE TABLE test (h1 INT, h2 INT, r1 INT, r2 INT, v INT w INT PRIMARY KEY ((h1,h2) HASH, r1 ASC, r2 ASC); -``` - -then any of the following objects could be locked: - -- the entire tablet -- all rows having h1=2, h2=3 -- all rows having h1=2, h2=3, r1=4 -- the row having h1=2, h2=3, r1=4, r2=5 -- column v of the row having h1=2, h2=3, r1=4, r2=5 - -With YCQL, granularities exist below the column level; for example, only one key of a column of map data type can be locked. - -## Efficiently detecting conflicts between locks of different granularities - -The straightforward way to handle locks of different granularities would be to have a map from lockable objects to lock types. However, this is too inefficient for detecting conflicts: attempting, for example, to add a lock at the tablet level would require checking for locks at every row and column in that tablet. - -To make conflict detection efficient, YugabyteDB stores extra information for each lockable object about any locks on sub-objects of it. In particular, instead of just taking a lock on _X_, it takes a normal lock on _X_ and also weaker versions of that lock on all objects that enclose _X_. The normal locks are called _strong_ locks and the weaker variants _weak_ locks. - -As an example, pretend YugabyteDB has only tablet- and row-level granularities. To take a serializable write lock at the row level (say on row _r_ of tablet _b_), it would take a strong write lock at the row level (on _r_) and a weak write lock at the tablet level (on _b_). To take a serializable read lock at the tablet level (assume also on _b_), YugabyteDB would just take a strong read lock at the tablet level (on _b_). - -Using the following conflict rules, YugabyteDB can decide if two original locks would conflict based only on whether or not their strong/weak locks at any lockable object would conflict: - -- two strong locks conflict if and only if they conflict ignoring their strength - - for example, serializable write conflicts with serializable read per the previous matrix -- two weak locks never conflict -- a strong lock conflicts with a weak lock if and only if they conflict ignoring their strength - -That is, for each lockable object that would have two locks, would they conflict under the above rules? There is no need to enumerate the sub-objects of any object. - -Consider our example with a serializable write lock at the row level and a serializable read lock at the tablet level. A conflict is detected at the tablet level because the strong read and the weak write locks on _b_ conflict because ordinary read and write locks conflict. - -What about a case involving two row-level snapshot isolation write locks on different rows in the same tablet? No conflict is detected because the tablet-level locks are weak and the strong row-level locks are on different rows. If they had involved the same row then a conflict would be detected because two strong snapshot isolation write locks conflict. - -Including the strong/weak distinction, the full conflict matrix becomes: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Strong Snapshot isolation writeWeak Snapshot isolation writeStrong Serializable writeWeak Serializable writeStrong Serializable readWeak Serializable read
Strong Snapshot isolation write✘ Conflict✘ Conflict✘ Conflict✘ Conflict✘ Conflict✘ Conflict
Weak Snapshot isolation write✘ Conflict✔ No conflict✘ Conflict✔ No conflict✘ Conflict✔ No conflict
Strong Serializable write✘ Conflict✘ Conflict✔ No conflict✔ No conflict✘ Conflict✘ Conflict
Weak Serializable write✘ Conflict✔ No conflict✔ No conflict✔ No conflict✘ Conflict✔ No conflict
Strong Serializable read✘ Conflict✘ Conflict✘ Conflict✘ Conflict✔ No conflict✔ No conflict
Weak Serializable read✘ Conflict✔ No conflict✘ Conflict✔ No conflict✔ No conflict✔ No conflict
diff --git a/docs/content/preview/architecture/yb-master.md b/docs/content/preview/architecture/yb-master.md deleted file mode 100644 index 1e9e646af715..000000000000 --- a/docs/content/preview/architecture/yb-master.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: YB-Master service -headerTitle: YB-Master service -linkTitle: YB-Master -description: Learn how the YB-Master service manages tablet metadata and coordinates cluster configuration changes. -headcontent: Catalog information, tablet metadata, and cluster coordination -aliases: - - /preview/architecture/concepts/yb-master/ -menu: - preview: - identifier: architecture-concepts-yb-master - parent: architecture - weight: 1100 -type: docs ---- - -The YB-Master service keeps the system metadata and records, such as tables and the location of their tablets, users and roles with their associated permissions, and so on. - -The YB-Master service is also responsible for coordinating background operations, such as load-balancing or initiating replication of under-replicated data, as well as performing a variety of administrative operations such as creating, altering, and dropping tables. - -The YB-Master is [highly available](#high-availability), as it forms a Raft group with its peers, and it is not in the critical path of I/O against user tables. - -![master_overview](/images/architecture/master_overview.png) - -The YB-Master performs a number of important operations within the system. Some operations are performed throughout the lifetime of the universe, in the background, without impacting foreground read and write performance. - -## Administrative operations - -When one of these universe-wide operations is initiated, such as creating a new table, modifying an existing table, dropping (deleting) a table, or creating backups, the YB-Master ensures that the operation is successfully propagated and applied to all relevant tablets, regardless of the current state of the YB-TServer nodes hosting those tablets. - -This guarantee is crucial because if a YB-TServer fails while such an operation is in progress, it cannot cause the operation to be only partially applied, leaving the database in an inconsistent state. The YB-Master makes sure the operation is either fully applied everywhere or not applied at all, maintaining data integrity. - -## System metadata - -The YB-Master stores important system-wide metadata, which includes information about: - -- Namespaces (database names) -- Table information -- User roles and permissions - -This system metadata is crucial for managing and coordinating the entire YugabyteDB cluster. The YB-Master stores this system metadata in an internal table. This allows the metadata to be managed and accessed like any other table in the database. - -To ensure redundancy and prevent data loss, the system metadata is replicated across all YB-Master nodes using a replication protocol called Raft. This means that if one YB-Master fails, the others will still have the up-to-date system metadata. - -## Table creation - -The YB-Master leader validates the table schema and decides the desired number of tablets for the table and creates metadata for each of them. The table schema and the tablet metadata information is replicated to YB-Master Raft group. This ensures that the table creation can succeed even if the current YB-Master leader fails. After this, as the operation is asynchronous and can proceed even if the current YB-Master leader fails, the table creation API returns a success. - -## Tablet assignments - -The YB-Master component in YugabyteDB keeps track of all the tablets (data shards) and the YB-TServer nodes that are currently hosting them. It maintains a mapping of which tablets are stored on which YB-TServer nodes. - -When clients, such as the YugabyteDB query layer or applications using the YCQL (Cassandra-compatible) or YSQL (PostgreSQL-compatible) APIs, need to retrieve data, they can efficiently query the YB-Master to get this tablet-to-node mapping. The smart clients then cache (store) this mapping locally. - -By having the tablet-to-node mapping cached, the smart clients can communicate directly with the correct YB-TServer node that holds the required data, without needing to go through additional network hops or intermediate components. This direct communication allows for efficient data retrieval and query processing. - -## Load balancing - -The YB-Master leader places (at `CREATE TABLE` time) the tablets across YB-TServers to enforce any user-defined data placement constraints and ensure uniform load. In addition, during the lifetime of the universe, as nodes are added, fail, or become decommissioned, it continues to balance the load and enforce data placement constraints automatically. - -## Leader balancing - -Aside from ensuring that the number of tablets served by each YB-TServer is balanced across the universe, the YB-Masters also ensure that each node has a symmetric number of tablet leaders across nodes. This is also done for the followers. - -## Re-replication of data - -The YB-Master receives regular "heartbeat" signals from all the YB-TServer nodes in the cluster. These heartbeats allow the YB-Master to monitor the liveness (active state) of each YB-TServer. - -If the YB-Master detects that a YB-TServer has failed (stopped sending heartbeats), it keeps track of how long the node has been in a failed state. If this failure duration exceeds a predetermined threshold, the YB-Master initiates a process to replace the failed node. - -Specifically, the YB-Master identifies replacement YB-TServer nodes and re-replicates (copies) the tablet data from the failed node to the new nodes. This re-replication process ensures that the data remains available and redundant, even after a node failure. - -However, the YB-Master carefully throttles (limits) the rate of re-replication to avoid impacting the ongoing, regular operations of the database cluster. This throttling prevents the re-replication from overloading the system and affecting the performance of user queries and other foreground activities. - -## High availability - -The YB-Master is not in the critical path of normal I/O operations, therefore its failure does not affect a functioning universe. Nevertheless, the YB-Master is a part of a Raft group with the peers running on different nodes. The number of peers is decided by the [replication factor](../key-concepts/#replication-factor-rf) of the [universe](../key-concepts/#universe). One of these peers is the active master and the others are active standbys. If the active master (the YB-Master leader) fails, these peers detect the leader failure and re-elect a new YB-Master leader which becomes the active master in seconds of the failure. diff --git a/docs/content/preview/architecture/yb-tserver.md b/docs/content/preview/architecture/yb-tserver.md deleted file mode 100644 index 1a0196de436a..000000000000 --- a/docs/content/preview/architecture/yb-tserver.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -title: YB-TServer service -headerTitle: YB-TServer service -linkTitle: YB-TServer -description: Learn how the YB-TServer service stores and serves application data using tablets (also known as shards). -headcontent: Serve application data and manage tablets -aliases: - - /preview/architecture/concepts/yb-tserver/ -menu: - preview: - identifier: architecture-concepts-yb-tserver - parent: architecture - weight: 1200 -type: docs ---- - -The YugabyteDB Tablet Server (YB-TServer) service is responsible for the input-output (I/O) of the end-user requests in a YugabyteDB cluster. Data for a table is split (sharded) into tablets. Each tablet is composed of one or more tablet peers, depending on the replication factor. Each YB-TServer hosts one or more tablet peers. - -The following diagram depicts a basic four-node YugabyteDB universe, with one table that has 4 tablets and a replication factor of 3: - -![TServer overview](/images/architecture/tserver_overview.png) - -The tablet-peers corresponding to each tablet hosted on different YB-TServers form a Raft group and replicate data between each other. The system shown in the preceding diagram includes sixteen independent Raft groups. For more information, see [Replication layer](../docdb-replication/). - -In each YB-TServer, cross-tablet intelligence is employed to maximize resource efficiency. There are multiple ways the YB-TServer coordinates operations across the tablets it hosts. - -## Server-global block cache - -The block cache is shared across different tablets in a given YB-TServer, leading to highly efficient memory utilization in cases when one tablet is read more often than others. For example, if one table has a read-heavy usage pattern compared to others, the block cache would automatically favor blocks of this table, as the block cache is global across all tablet peers. - -## Space amplification - -YugabyteDB's compactions are size-tiered. Size-tier compactions have the advantage of lower disk write (I/O) amplification when compared to level compactions. There may be a concern that size-tiered compactions have a higher space amplification (that it needs 50% space headroom). This is not true in YugabyteDB because each table is broken into several tablets and concurrent compactions across tablets are throttled to a specific maximum. The typical space amplification in YugabyteDB tends to be in the 10-20% range. - -## Throttled compactions - -The compactions are throttled across tablets in a given YB-TServer to prevent compaction storms. This prevents, for example, high foreground latencies during a compaction storm. - -The default policy ensures that doing a compaction is worthwhile. The algorithm tries to make sure that the files being compacted are not too disparate in terms of size. For example, it does not make sense to compact a 100GB file with a 1GB file to produce a 101GB file, because it would require a lot of unnecessary I/O for little gain. - -## Compaction queues - -Compactions are prioritized into large and small compactions with some prioritization to keep the system functional even in extreme I/O patterns. - -In addition to throttling controls for compactions, YugabyteDB does a variety of internal optimizations to minimize impact of compactions on foreground latencies. For example, a prioritized queue to give priority to small compactions over large compactions to make sure the number of SSTable files for any tablet stays as low as possible. - -## Manual compactions - -YugabyteDB allows compactions to be externally triggered on a table using the [`compact_table`](../../admin/yb-admin/#compact-table) command in the [yb-admin utility](../../admin/yb-admin/). This is useful when new data is no longer coming into the system for a table and you might want to reclaim disk space due to overwrites or deletes that have already happened, or due to TTL expiry. - -## Statistics-based full compactions to improve read performance - -YugabyteDB tracks the number of key-value pairs that are read at the DocDB level over a sliding period (dictated by the [auto_compact_stat_window_seconds](../../reference/configuration/yb-tserver#auto-compact-stat-window-seconds) YB-TServer flag). If YugabyteDB detects an overwhelming amount of the DocDB reads in a tablet are skipping over tombstoned and obsolete keys, then a full compaction will be triggered to remove the unnecessary keys. - -Once all of the following conditions are met in the sliding window, full compaction is automatically triggered on the tablet: - -- The ratio of obsolete (for example, deleted or removed due to TTL) versus active keys read reaches the threshold [auto_compact_percent_obsolete](../../reference/configuration/yb-tserver/#auto-compact-percent-obsolete). - -- Enough keys have been read ([auto_compact_min_obsolete_keys_found](../../reference/configuration/yb-tserver/#auto-compact-min-obsolete-keys-found)). - -While this feature is compatible with tables with TTL, YugabyteDB won't schedule compactions on tables with TTL if the [TTL file expiration](../../develop/learn/ttl-data-expiration-ycql/#efficient-data-expiration-for-ttl) feature is active. - -## Scheduled full compactions - - YugabyteDB allows full compactions overall data in a tablet to be scheduled automatically using the [scheduled_full_compaction_frequency_hours](../../reference/configuration/yb-tserver#scheduled-full-compaction-frequency-hours) and [scheduled_full_compaction_jitter_factor_percentage](../../reference/configuration/yb-tserver#scheduled-full-compaction-jitter-factor-percentage) YB-TServer flags. This can be useful for performance and disk space reclamation for workloads with a large number of overwrites or deletes on a regular basis. This can be used with tables with TTL as well but is not compatible with the [TTL file expiration](../../develop/learn/ttl-data-expiration-ycql/#efficient-data-expiration-for-ttl) feature. - -## Server-global memstore limit - -Server-global memstore limit tracks and enforces a global size across the memstores for different tablets. This is useful when there is a skew in the write rate across tablets. For example, if there are tablets belonging to multiple tables in a single YB-TServer and one of the tables gets a lot more writes than the other tables, the write-heavy table is allowed to grow much larger than it could if there was a per-tablet memory limit. This allows for good write efficiency. - -## Auto-sizing of block cache and memstore - -The block cache and memstores represent some of the larger memory-consuming components. Since these are global across all the tablet peers, this makes memory management and sizing of these components across a variety of workloads easy. Based on the RAM available on the system, the YB-TServer automatically gives a certain percentage of the total available memory to the block cache and another percentage to memstores. - -## Distributing tablet load uniformly across data disks - -On multi-SSD machines, the data (SSTable) and WAL (Raft write-ahead log) for various tablets of tables are evenly distributed across the attached disks on a per-table basis. This load distribution (also known as striping), ensures that each disk handles an even amount of load for each table. - -## High availability - -The failure of a YB-TServer hosting follower tablets has no impact on the write path. If there are any leader tablets present in a failed YB-TServer, the raft group for that tablet [elects](../docdb-replication/raft#leader-election) a new leader on a different YB-TServer. The unavailability window is approximately 3 seconds (assuming the default heartbeat interval of 500 ms) in the event of a failure of the tablet peer leader. diff --git a/docs/content/preview/benchmark/_index.md b/docs/content/preview/benchmark/_index.md deleted file mode 100644 index 9854e35cb27f..000000000000 --- a/docs/content/preview/benchmark/_index.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: Benchmark YugabyteDB -headerTitle: Benchmark YugabyteDB -linkTitle: Benchmark -description: Benchmark YugabyteDB using TPC-C, sysbench, YCSB and more. -aliases: - - /preview/benchmark/performance/ -type: indexpage ---- - -YugabyteDB is designed to provide high availability, scalability, and fault tolerance while providing simple interfaces via YSQL and YCQL APIs. However, to assess its true capabilities and to showcase its potential to handle real-world workloads, rigorous benchmarking is essential. - -Benchmarking is the process of evaluating the performance and capabilities of a system under specific workloads to gain insights into its scalability, resilience, and overall efficiency. This process involves simulating real-world usage scenarios using standardized workloads to understand how well the system performs, scales, and recovers from failures. It is crucial to understand the ability of YugabyteDB to handle various workloads, such as the TPC-C, YCSB, and sysbench benchmarks, which represent different aspects of a distributed database's performance. - -## TPC-C (Transaction Processing Performance Council - Benchmark C) - -[TPC-C](http://www.tpc.org/tpcc/) is a widely recognized benchmark for testing the performance of transactional database systems. It simulates a complex OLTP (Online Transaction Processing) workload that involves a mix of different transactions like order creation, payment processing, and stock level checking. Benchmarking YugabyteDB using TPC-C helps assess its ability to handle a high volume of concurrent transactions and maintain consistency and integrity. - -{{}} -To test performance for concurrent transactions with TPC-C, see [TPC-C](tpcc/). -{{}} - -## YCSB (Yahoo Cloud Serving Benchmark) - -[YCSB](https://github.com/brianfrankcooper/YCSB/wiki) is designed to evaluate the performance of databases under various read and write workloads, ranging from mostly read-heavy to write-heavy. Using YCSB, you can assess how well YugabyteDB handles different data access patterns and query loads, which is crucial for applications with diverse usage requirements. - -{{}} -To test performance using the Yahoo Cloud Serving Benchmark, see [YCSB](ycsb-ysql/). -{{}} - -## sysbench - -[sysbench](https://github.com/akopytov/sysbench) is a versatile benchmarking tool that covers a wide range of database workloads, including CPU, memory, disk I/O, and database operations. It helps measure the system's performance, stability, and scalability under different stress conditions, enabling you to identify potential bottlenecks and weaknesses. - -{{}} -To test performance using sysbench, see [Sysbench](sysbench-ysql/). -{{}} - -## Learn More - -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/benchmark/resilience/_index.md b/docs/content/preview/benchmark/resilience/_index.md deleted file mode 100644 index 749875801389..000000000000 --- a/docs/content/preview/benchmark/resilience/_index.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: Benchmark resilience (fault tolerance) -headerTitle: Resilience -linkTitle: Resilience -description: Benchmark YugabyteDB's ability to withstand component failure. -headcontent: Benchmarking the ability to withstand component failure -menu: - preview: - identifier: resilience - parent: benchmark - weight: 21 -type: indexpage ---- - -Resiliency refers to the ability of a system to withstand and recover from failures or disruptions, whether they are caused by software bugs, hardware issues, network problems, or external events. A resilient system is designed to absorb the impact of failures and continue operating, even if at a degraded level, without experiencing a complete outage. - -In YugabyteDB, resiliency is achieved through various techniques, including the following: - -- **Fault tolerance**. Replicating tablets on to multiple nodes with one acting as a leader and others as followers. If the leader fails, a new leader is automatically elected, ensuring continuous availability. This replication and fault-tolerant architecture allows the database to withstand the failure of individual nodes, or even entire datacenters without losing data or service availability. -- **Consistency guarantees**. Raft-based consensus ensures full ACID (Atomicity, Consistency, Isolation, Durability) transactions, even across multiple tablets and datacenters. This consistency model helps maintain data integrity and coherence, even in the face of failures or network partitions. -- **Self healing**. YugabyteDB automatically detects and recovers from failures, such as node crashes, disk failures, or network partitions. It can automatically repair and rebalance the cluster by re-replicating data and redistributing tablet leaders to maintain optimal performance and resilience. -- **Elasticity**. YugabyteDB can dynamically adjust the number of replicas and the distribution of data across the cluster, ensuring that the system can handle changes in load and resource requirements. This scalability and elasticity help maintain the overall resilience and availability of the database, even as the workload and infrastructure requirements change over time. -- **Backup and disaster recovery (DR)**. YugabyteDB provides built-in backup and DR capabilities, allowing you to create consistent snapshots of the data and restore it in the event of a major failure or disaster. These backup and DR features help ensure the long-term resilience and recoverability of the database, even in the face of large-scale failures or catastrophic events. - -## Jepsen test - -[Jepsen](https://jepsen.io/) testing is a methodology and set of tools used to rigorously test the fault tolerance and correctness of distributed systems, particularly databases and other data storage systems. Jepsen deliberately injects faults into the system, such as network partitions, process crashes, disk failures, and other types of failures. - -Jepsen employs a rigorous verification process, which includes generating complex, realistic workloads, carefully monitoring the system's behavior, and analyzing the results to identify any inconsistencies or violations of the specified properties. - -YugabyteDB passes 99.9% of the Jepsen tests. - -{{}} -For more details, see [Jepsen test results](jepsen-testing/). -{{}} - -## Learn more - -- [Resiliency, high availability, and fault tolerance](../../explore/fault-tolerance/) diff --git a/docs/content/preview/benchmark/scalability/_index.md b/docs/content/preview/benchmark/scalability/_index.md deleted file mode 100644 index 5d6728f8e7a0..000000000000 --- a/docs/content/preview/benchmark/scalability/_index.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: Benchmark scalability -headerTitle: Scalability -linkTitle: Scalability -description: Benchmark scalability of queries and datasets in YugabyteDB. -headcontent: Benchmark adjusting capacity to meet demand -menu: - preview: - identifier: scalability - parent: benchmark - weight: 20 -type: indexpage ---- - diff --git a/docs/content/preview/benchmark/tpcc/_index.md b/docs/content/preview/benchmark/tpcc/_index.md deleted file mode 100644 index 20ed8e5f09ba..000000000000 --- a/docs/content/preview/benchmark/tpcc/_index.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -title: TPC-C Benchmark on YugabyteDB -headerTitle: TPC-C Benchmark on YugabyteDB -linkTitle: TPC-C -description: Benchmark YugabyteDB using TPC-C. -aliases: - - /benchmark/tpcc - - /benchmark/tpcc-ysql - - /preview/benchmark/tpcc-ysql/ -menu: - preview: - identifier: tpcc - parent: benchmark - weight: 4 -type: indexpage ---- - -[TPC-C](http://www.tpc.org/tpcc/) is a popular online transaction processing benchmark that provides metrics you can use to evaluate the performance of YugabyteDB for concurrent transactions of different types and complexity, and which are either executed online or queued for deferred execution. Developed by the Transaction Processing Performance Council (TPC), it simulates a complete computing environment where a population of users execute transactions against a database. - -{{}} -All benchmarks were run on a single-region YugabyteDB cluster running on {{}}, except 150K warehouses, which was run on [v2.11](/preview/releases/ybdb-releases/end-of-life/v2.11/). -{{}} - -## Running the benchmark - -Conducting an accurate TPC-C benchmark requires aligning your test environment with your production landscape. Begin by assessing your anticipated workload in terms of IOPS and projected data volume. These estimates will guide you in selecting an appropriate cluster configuration that closely mirrors your operational requirements. - -After you've identified a cluster specification that matches your needs, apply the TPC-C workload recommended for that particular setup. The goal is to validate that the cluster can sustain the expected transaction throughput—measured in tpmC with a high degree of efficiency, typically exceeding 99.5%. This high-efficiency rate ensures that the cluster meets the benchmark's demands with minimal resource overhead, indicating its readiness to handle your real-world, high-volume transactional workloads. - -{{}} -For information on cluster specification/workload and how to run the TPC-C against a local or a YugabyteDB Aeon cluster, see [Running TPC-C](running-tpcc/). -{{}} - -## Scale out - -YugabyteDB exhibits exemplary scalability under the TPC-C workload, demonstrating a linear growth in performance as the cluster expands. The accompanying graph illustrates this linear scalability, showing how YugabyteDB's transaction throughput—quantified in tpmC increases in direct proportion to the number of nodes added to the cluster. - -![Horizontal scaling](/images/benchmark/tpcc-horizontal.png) - -{{}} -To see how effectively YugabyteDB handles the TPC-C workload while scaling out, see [Testing horizontal scaling](horizontal-scaling/). -{{}} - -## High scale workloads - -YugabyteDB's robust performance in the TPC-C benchmark, particularly when scaled to a high number of warehouses, serves as a compelling testament to its prowess in handling high-volume transaction processing workloads. By excelling in this industry-standard test, which simulates complex, concurrent transactions across a vast, distributed dataset, YugabyteDB has effectively demonstrated its ability to manage the intense demands of large-scale OLTP environments. - -{{}} -To see how well YugabyteDB handles extremely high workloads, see [Testing high scale workloads](high-scale-workloads/). -{{}} - -## Max scale tested - -In our testing, YugabyteDB was able to process 1M tpmC with 150,000 warehouses at an efficiency of 99.8% on an RF3 cluster of 75 c5d.12xlarge machines with a total data size of 50TB. - -{{}} -The 150K warehouses benchmark was run on [v2.11](/preview/releases/ybdb-releases/end-of-life/v2.11/). -{{}} - -| Warehouses | TPMC | Efficiency(%) | Nodes | Connections | New Order Latency | Machine Type (vCPUs) | -| ---------: | :--- | :-----------: | :---: | ----------- | :---------------: | :--------------------- | -| 150,000 | 1M | 99.30 | 75 | 9000 | 123.33 ms | c5d.12xlarge (48) | - -{{}} -To know more about this accomplishment, see [Largest benchmark](./high-scale-workloads/#largest-benchmark). -{{}} diff --git a/docs/content/preview/best-practices-operations/_index.md b/docs/content/preview/best-practices-operations/_index.md deleted file mode 100644 index 44ea2ad33d75..000000000000 --- a/docs/content/preview/best-practices-operations/_index.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Best practices for operations -headerTitle: Best practices -linkTitle: Best practices -description: Tips and tricks to run YugabyteDB deployments -headcontent: Tips and tricks to run YugabyteDB deployments -menu: - preview: - identifier: best-practices-operations - parent: launch-and-manage - weight: 80 -type: indexpage ---- - -{{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/contribute/core-database/_index.md b/docs/content/preview/contribute/core-database/_index.md deleted file mode 100644 index a1279787b388..000000000000 --- a/docs/content/preview/contribute/core-database/_index.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Contribute to the core database -headerTitle: Contribute to the core database -linkTitle: Core database -description: Contribute to the core database -image: fa-thin fa-rectangle-terminal -headcontent: How to contribute code to the core database -menu: - preview: - identifier: core-database - parent: contribute - weight: 2910 -type: indexpage ---- - -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/contribute/core-database/checklist.md b/docs/content/preview/contribute/core-database/checklist.md deleted file mode 100644 index db6cafae3893..000000000000 --- a/docs/content/preview/contribute/core-database/checklist.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -title: Contribution checklist -headerTitle: Contribution checklist -linkTitle: Contribution checklist -description: Review the steps to start contributing code and documentation. -headcontent: Checklist for contributing to the core database. -menu: - preview: - identifier: contribute-checklist - parent: core-database - weight: 2911 -type: docs ---- - -## Step 1. Build the source - -* First, clone [the YugabyteDB GitHub repo](https://github.com/yugabyte/yugabyte-db). - - ```bash - git clone https://github.com/yugabyte/yugabyte-db.git - ``` - -* Next, [build the source code](../build-from-src-almalinux). -* Optionally, you may want to [run the unit tests](../build-and-test#test). - -## Step 2. Start a local cluster - -Having built the source, you can [start a local cluster](/preview/quick-start/macos/). - -## Step 3. Make the change - -You should now make your change, recompile the code and test out your change. - -{{< note title="Note" >}} - -You should read the [code style guide](../coding-style). - -{{< /note >}} - -## Step 4. Add unit tests - -Depending on the change, you should add unit tests to make sure they do not break as the codebase is modified. - -## Step 5. Re-run unit tests - -Re-run the unit tests with your changes and make sure all tests pass. - -## Step 6. Submit a pull request - -Congratulations on the change! You should now submit a pull request for a code review and leave a message on the Slack channel. Once the code review passes, your code will get merged in. diff --git a/docs/content/preview/contribute/docs/_index.md b/docs/content/preview/contribute/docs/_index.md deleted file mode 100644 index 7902bb647373..000000000000 --- a/docs/content/preview/contribute/docs/_index.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: Contribute to the documentation -headerTitle: Contribute to the documentation -linkTitle: Documentation -description: Contribute to the documentation -image: fa-thin fa-books -headcontent: How to contribute to the YugabyteDB documentation -menu: - preview: - identifier: docs - parent: contribute - weight: 2910 -type: indexpage ---- - -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/contribute/docs/docs-layout.md b/docs/content/preview/contribute/docs/docs-layout.md deleted file mode 100644 index 957cc85d6b9a..000000000000 --- a/docs/content/preview/contribute/docs/docs-layout.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: Find the right page or section -headerTitle: Find the right page or section -linkTitle: Docs layout -description: Find the right location in the YugabyteDB docs -menu: - preview: - identifier: docs-layout - parent: docs - weight: 2912 -type: docs ---- - -The YugabyteDB docs are divided into several sections: - -* [**YugabyteDB Core**](/preview/) is the overview documentation for YugabyteDB -* [**YugabyteDB Anywhere**](/preview/yugabyte-platform/) documents YugabyteDB Anywhere -* [**YugabyteDB Aeon**](/preview/yugabyte-cloud/) documents YugabyteDB Aeon -* [**Releases**](/preview/releases/) contains release notes and other information related to releases -* [**Integrations**](/preview/integrations/) documents third-party integrations -* [**Reference**](/preview/reference/configuration/) contains detailed reference and architecture information about functions, features, and interfaces -* [**FAQ**](/preview/faq/general/) contains frequently-asked questions on a variety of topics -* [**Misc**](/preview/legal/) contains legal information and the (deprecated) YEDIS subsystem - -### YugabyteDB core docs - -The [core docs](/preview/) are landing pages with overview and getting-started information. Pages in this section should have a high-level overview of a feature, what's supported, limitations, and a link to any roadmap GitHub issues. These docs pages can have "Further reading" sections, where you can add links to blogs or Reference section docs as appropriate. - -#### Explore section - -Think of the pages in the [Explore section](/preview/explore/) as a self-guided tour of YugabyteDB. When you're reading a page in this section, you should be able to get a good sense of how the feature works. The page may not answer every question you have, but should point you to the reference page where you can find that information. - -### Reference docs - -Reference docs should be comprehensive and, above all, accurate. This applies to other doc types, but is especially important for reference docs, as they should be the ultimate source of truth for users. - -Here are some examples of reference docs in our documentation: - -* [Replication in DocDB](/preview/architecture/docdb-replication/replication/) -* SQL reference sample: [CREATE TABLE [YSQL]](/preview/api/ysql/the-sql-language/statements/ddl_create_table/) - -### Design docs on GitHub - -We also have design docs [in GitHub](https://github.com/yugabyte/yugabyte-db/tree/master/architecture/design). These design docs should be referenced from the Reference section in the docs. - -## Legend for illustrations - -Many of the illustrations in the docs use the following legend to represent tablet leaders and followers, cloud regions and zones, and applications. - -![Legend for illustrations](/images/develop/global-apps/global-database-legend.png) - -## Next steps - -Now that you know where your page should go, [build the docs](../docs-build/) locally and [start editing](../docs-edit/). diff --git a/docs/content/preview/deploy/_index.md b/docs/content/preview/deploy/_index.md deleted file mode 100644 index a1f7b3091352..000000000000 --- a/docs/content/preview/deploy/_index.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Deploy -headerTitle: Deploy YugabyteDB -linkTitle: Deploy -description: How to deploy the YugabyteDB database to any public cloud or private data center or Kubernetes. Includes checklist and manual deployment options as well. -headcontent: Deploy to the public cloud, a private data center, or Kubernetes -aliases: - - /deploy/ - - /preview/architecture/layered-architecture/ - - /preview/architecture/overview/ -menu: - preview: - identifier: deploy - parent: launch-and-manage - weight: 10 -type: indexpage ---- - -{{< page-finder/head text="Deploy YugabyteDB" subtle="across different products">}} - {{< page-finder/list icon="/icons/database-hover.svg" text="YugabyteDB" current="" >}} - {{< page-finder/list icon="/icons/server-hover.svg" text="YugabyteDB Anywhere" url="../yugabyte-platform/create-deployments/" >}} - {{< page-finder/list icon="/icons/cloud-hover.svg" text="YugabyteDB Aeon" url="/preview/yugabyte-cloud/cloud-basics/" >}} -{{< /page-finder/head >}} - -{{}} - - {{}} - -{{}} - -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/deploy/checklist.md b/docs/content/preview/deploy/checklist.md deleted file mode 100644 index 63f08a9d2d63..000000000000 --- a/docs/content/preview/deploy/checklist.md +++ /dev/null @@ -1,248 +0,0 @@ ---- -title: Deployment checklist for YugabyteDB clusters -headerTitle: Deployment checklist -linkTitle: Deployment checklist -description: Checklist to review system requirements, configuration details, and so on, when deploying the YugabyteDB database to production or for performance testing. -menu: - preview: - identifier: checklist - parent: deploy - weight: 10 -type: docs ---- - -A YugabyteDB cluster (also referred to as a [universe](../../architecture/key-concepts/#universe)) consists of two distributed services - the [YB-TServer](../../architecture/yb-tserver/) service and the [YB-Master](../../architecture/yb-master/) service. Because the YB-Master service serves the role of the cluster metadata manager, it should be brought up first, followed by the YB-TServer service. To bring up these distributed services, the respective servers (YB-Master or YB-TServer) need to be started across different nodes. There is a number of topics to consider and recommendations to follow when starting these services. - -## Basics - -- YugabyteDB supports both x86 and ARM (aarch64) CPU architectures. -- YugabyteDB is supported on a variety of [operating systems](../../reference/configuration/operating-systems/). For production workloads, the recommended operating systems are AlmaLinux 8 and RHEL 8. -- The appropriate system limits should be set using [ulimit](../manual-deployment/system-config/#set-ulimits) on each node running a YugabyteDB server. -- [chrony](../manual-deployment/system-config#set-up-time-synchronization) should be used to synchronize time among the machines. - -## Replication - -YugabyteDB internally replicates data in a consistent manner using the Raft consensus protocol to survive node failure without compromising data correctness. This distributed consensus replication is applied at a per-shard (also known as tablet) level similar to Google Spanner. - -The replication factor (RF) corresponds to the number of copies of the data. You need at least as many nodes as the RF, which means one node for RF 1, three nodes for RF 3, and so on. With a RF of 3, your cluster can tolerate one node failure. With a RF of 5, it can tolerate two node failures. More generally, if RF is n, YugabyteDB can survive floor((n - 1) / 2) failures without compromising correctness or availability of data. - -See [Fault tolerance](../../architecture/docdb-replication/replication/#fault-tolerance) for more information. - -When deploying a cluster, keep in mind the following: - -- The default replication factor is 3. -- The number of YB-Master servers running in a cluster should match RF. Run each server on a separate machine to prevent losing availability on failures. You need to specify the RF using the `--replication_factor` flag when bringing up the YB-Master servers. -- The number of YB-TServer servers running in the cluster should not be less than the RF. Run each server on a separate machine to prevent losing availability on failures. -- An even RF number offers the same fault tolerance as its preceding odd number. For example, both RF 4 and RF 3 can only tolerate the loss of 1 node. So to keep costs low, it's preferable to use an odd RF number. - -Note that YugabyteDB works with both hostnames or IP addresses. The latter are preferred at this point, as they are more extensively tested. - -See the [yb-master command reference](../manual-deployment/start-masters/) for more information. - -## Hardware requirements - -YugabyteDB is designed to run on bare-metal machines, virtual machines (VMs), and containers. - -### CPU and RAM - -You should allocate adequate CPU and RAM. YugabyteDB has adequate defaults for running on a wide range of machines, and has been tested from 2 core to 64 core machines, and up to 200GB RAM. - -**Minimum requirement** - -- 2 cores -- 2GB RAM - -**Production requirement** - -- YCQL - 16+ cores and 32GB+ RAM -- YSQL - 16+ cores and 64GB+ RAM - -Add more CPU (compared to adding more RAM) to improve performance. - -**Additional considerations** - -For typical Online Transaction Processing (OLTP) workloads, YugabyteDB performance improves with more aggregate CPU in the cluster. You can achieve this by using larger nodes or adding more nodes to a cluster. Note that if you do not have enough CPUs, this will manifest itself as higher latencies and eventually dropped requests. - -Memory depends on your application query pattern. Writes require memory but only up to a certain point (for example, 4GB, but if you have a write-heavy workload you may need a little more). Beyond that, more memory generally helps improve the read throughput and latencies by caching data in the internal cache. If you do not have enough memory to fit the read working set, then you will typically experience higher read latencies because data has to be read from disk. Having a faster disk could help in some of these cases. - -YugabyteDB explicitly manages a block cache, and does not need the entire data set to fit in memory. It does not rely on the OS to keep data in its buffers. If you provide YugabyteDB sufficient memory, data accessed and present in block cache stays in memory. - -### Memory and tablet limits - -For a cluster with [RF3](../../architecture/key-concepts/#replication-factor-rf), 1000 tablets imply 3000 tablet replicas. If the cluster has three nodes, then each node has on average 1000 tablet replicas. A six node cluster would have on average 500 tablet replicas per-node, and so on. - -Each 1000 tablet replicas on a node impose an overhead of 0.4 vCPUs for Raft heartbeats (assuming a 0.5 second heartbeat interval), and 800 MiB of memory. - -The overhead is proportional to the number of tablet replicas, so 500 tablet replicas would need half as much. - -Additional memory will be required for supporting caches and the like if the tablets are being actively used. We recommend provisioning an extra 6200 MiB of memory for each 1000 tablet replicas on a node to handle these cases; that is, a TServer should have 7000 MiB of RAM allocated to it for each 1000 tablet replicas it may be expected to support. - -You can manually provision the amount of memory each TServer uses by setting the [--memory_limit_hard_bytes](../../reference/configuration/yb-tserver/#memory-limit-hard-bytes) or [--default_memory_limit_to_ram_ratio](../../reference/configuration/yb-tserver/#default-memory-limit-to-ram-ratio) flags. - -#### YSQL - -Manually provisioning is a bit tricky as you need to take into account how much memory the kernel needs as well as the PostgreSQL processes and any Master process that is going to be colocated with the TServer. - -Accordingly, it is recommended that you instead use the [--use_memory_defaults_optimized_for_ysql](../../reference/configuration/yb-tserver/#use-memory-defaults-optimized-for-ysql) flag, which gives good memory division settings for using YSQL optimized for your node's size. - -The flag does the following: - -- Automatically sets memory division flag defaults to provide much more memory for PostgreSQL, and optimized for the node size. For details on memory flag defaults, refer to [Memory division flags](../../reference/configuration/yb-tserver/#memory-division-flags). -- Enforces tablet limits based on available memory. This limits the total number of tablet replicas that a cluster can support. If you try to create a table whose additional tablet replicas would bring the total number of tablet replicas in the cluster over this limit, the create table request is rejected. For more information, refer to [Tablet limits](../../architecture/docdb-sharding/tablet-splitting/#tablet-limits). - -{{< tip title="Tip" >}} - -To view the number of live tablets and the limits, open the **YB-Master UI** (`:7000/`) and click the **Tablet Servers** tab. Under **Universe Summary**, the total number of live tablet replicas is listed as **Active Tablet-Peers**, and the limit as **Tablet Peer Limit**. - -![Tablet limits](/images/admin/master-tablet-limits.png) - -{{< /tip >}} - -(Note that although the default setting is false, when creating a new cluster using yugabyted or YugabyteDB Anywhere, the flag is set to true, unless you explicitly set it to false.) - -Given the amount of RAM devoted to per tablet overhead, it is possible to compute the maximum number of tablet replicas. The following table shows sample values of node RAM versus maximum tablet replicas. You can use these values to estimate how big of a node you will need based on how many tablet replicas per server you want supported. - -| total node GiB | max number of tablet replicas | max number of PostgreSQL connections | -| ---: | ---: | ---: | -| 4 | 240 | 30 | -| 8 | 530 | 65 | -| 16 | 1,250 | 130 | -| 32 | 2,700 | 225 | -| 64 | 5,500 | 370 | -| 128 | 11,000 | 550 | -| 256 | 22,100 | 730 | - -These values are approximate because different kernels use different amounts of memory, leaving different amounts of memory for the TServer and thus the per-tablet overhead TServer component. - -Also shown is an estimate of how many PostgreSQL connections that node can handle assuming default PostgreSQL flags and usage. Unusually memory expensive queries or preloading PostgreSQL catalog information will reduce the number of connections that can be supported. - -Thus a 8 GiB node would be expected to be able support 530 tablet replicas and 65 (physical) typical PostgreSQL connections. A cluster of six of these nodes would be able to support 530 \* 2 = 1,060 [RF3](../../architecture/key-concepts/#replication-factor-rf) tablets and 65 \* 6 = 390 typical physical PostgreSQL connections assuming the connections are evenly distributed among the nodes. - -#### YCQL - -If you are not using YSQL, ensure the [use_memory_defaults_optimized_for_ysql](../../reference/configuration/yb-master/#use-memory-defaults-optimized-for-ysql) flag is set to false. This flag optimizes YugabyteDB's memory setup for YSQL, reserving a considerable amount of memory for PostgreSQL; if you are not using YSQL then that memory is wasted when it could be helping improve performance by allowing more data to be cached. - -Note that although the default setting is false, when creating a new cluster using yugabyted or YugabyteDB Anywhere, the flag is set to true, unless you explicitly set it to false. - -### Verify support for SSE2 and SSE4.2 - -YugabyteDB requires the SSE2 instruction set support, which was introduced into Intel chips with the Pentium 4 in 2001 and AMD processors in 2003. Most systems produced in the last several years are equipped with SSE2. - -In addition, YugabyteDB requires SSE4.2. - -To verify that your system supports SSE2, run the following command: - -```sh -cat /proc/cpuinfo | grep sse2 -``` - -To verify that your system supports SSE4.2, run the following command: - -```sh -cat /proc/cpuinfo | grep sse4.2 -``` - -### Disks - -- SSDs (solid state disks) are required. - - - -- Both local or remote attached storage work with YugabyteDB. Because YugabyteDB internally replicates data for fault tolerance, remote attached storage which does its own additional replication is not a requirement. Local disks often offer better performance at a lower cost. -- Multi-disk nodes: - - - Do not use RAID across multiple disks. YugabyteDB can natively handle multi-disk nodes (JBOD). - - Create a data directory on each of the data disks and specify a comma separated list of those directories to the YB-Master and YB-TServer servers via the `--fs_data_dirs` flag. - -- Mount settings: - - - XFS is the recommended filesystem. - - Use the `noatime` setting when mounting the data drives. - - ZFS is not currently supported. - - NFS is not currently supported. - -YugabyteDB does not require any form of RAID, but runs optimally on a JBOD (just a bunch of disks) setup. -YugabyteDB can also leverage multiple disks per node and has been tested beyond 20 TB of storage per node. - -Write-heavy applications usually require more disk IOPS (especially if the size of each record is larger), therefore in this case the total IOPS that a disk can support matters. On the read side, if the data does not fit into the cache and data needs to be read from the disk in order to satisfy queries, the disk performance (latency and IOPS) will start to matter. - -YugabyteDB uses per-tablet [size tiered compaction](../../architecture/yb-tserver/). Therefore the typical space amplification in YugabyteDB tends to be in the 10-20% range. - -YugabyteDB stores data compressed by default. The effectiveness of compression depends on the data set. For example, if the data has already been compressed, then the additional compression at the storage layer of YugabyteDB will not be very effective. - -It is recommended to plan for about 20% headroom on each node to allow space for miscellaneous overheads such as temporary additional space needed for compactions, metadata overheads, and so on. - -## Network - -The following is a list of default ports along with the network access required for using YugabyteDB: - -- Each of the nodes in the YugabyteDB cluster must be able to communicate with each other using TCP/IP on the following ports: - - - 7100 for YB-Master RPC communication. - - 9100 for YB-TServer RPC communication. - -- To view the cluster dashboard, you need to be able to navigate to the following ports on the nodes: - - - 7000 for viewing the YB-Master Admin UI. - -- To access the database from applications or clients, the following ports need to be accessible from the applications or CLI: - - - 5433 for YSQL - - 9042 for YCQL - -This deployment uses YugabyteDB [default ports](../../reference/configuration/default-ports/). - -YugabyteDB Anywhere has its own port requirements. Refer to [Networking](../../yugabyte-platform/prepare/networking/). - -## Clock synchronization - -For YugabyteDB to maintain strict data consistency, clock drift and clock skew across all nodes _must_ be tightly controlled and kept within defined bounds. Any deviation can impact node availability, as YugabyteDB prioritizes consistency over availability and will shut down servers if necessary to maintain integrity. Clock synchronization software, such as [NTP](http://www.ntp.org/) or [chrony](https://chrony.tuxfamily.org/), allows you to reduce clock skew and drift by continuously synchronizing system clocks across nodes in a distributed system like YugabyteDB. The following are some recommendations on how to configure clock synchronization. - -### Clock skew - -Set a safe value for the maximum clock skew flag (`--max_clock_skew_usec`) for YB-TServers and YB-Masters when starting the YugabyteDB servers. The recommended value is two times the expected maximum clock skew between any two nodes in your deployment. - -For example, if the maximum clock skew across nodes is expected to be no more than 250 milliseconds, then set the parameter to 500000 (`--max_clock_skew_usec=500000`). - -### Clock drift - -The maximum clock drift on any node should be bounded to no more than 500 PPM (or parts per million). This means that the clock on any node should drift by no more than 0.5 ms per second. Note that 0.5 ms per second is the standard assumption of clock drift in Linux. - -In practice, the clock drift would have to be orders of magnitude higher in order to cause correctness issues. - -## Security checklist - -For a list of best practices, see [security checklist](../../secure/security-checklist/). - -## Public clouds - -YugabyteDB can run on a number of public clouds. - -### Amazon Web Services (AWS) - -- Use the M [instance family](https://aws.amazon.com/ec2/instance-types/). -- Recommended type is M6i. Use the higher CPU instance types especially for large YSQL workloads. -- Use gp3 EBS (SSD) disks that are at least 250GB in size, larger if more IOPS are needed. - - Scale up the IOPS as you scale up the size of the disk. - - In YugabyteDB testing, gp3 EBS SSDs provide the best performance for a given cost among the various EBS disk options. -- Avoid running on [T2 instance types](https://aws.amazon.com/ec2/instance-types/t2/). The T2 instance types are burstable instance types. Their baseline performance and ability to burst are governed by CPU credits, which makes it hard to get steady performance. -- Use VPC peering for multi-region deployments and connectivity to S3 object stores. - -### Google Cloud - -- Use the N2 high-CPU instance family. As a second choice, the N2 standard instance family can be used. -- Recommended instance types are `n2-highcpu-16` and `n2-highcpu-32`. -- [Local SSDs](https://cloud.google.com/compute/docs/disks/#localssds) are the preferred storage option, as they provide improved performance over attached disks, but the data is not replicated and can be lost if the node fails. This option is ideal for databases such as YugabyteDB that manage their own replication and can guarantee high availability (HA). For more details on these tradeoffs, refer to [Local vs remote SSDs](../../deploy/kubernetes/best-practices/#local-versus-remote-ssds). - - Each local SSD is 375 GB in size, but you can attach up to eight local SSD devices for 3 TB of total local SSD storage space per instance. -- As a second choice, [remote persistent SSDs](https://cloud.google.com/compute/docs/disks/#pdspecs) perform well. Make sure the size of these SSDs are at least 250GB in size, larger if more IOPS are needed: - - The number of IOPS scale automatically in proportion to the size of the disk. -- Avoid running on f1 or g1 machine families. These are [burstable, shared core machines](https://cloud.google.com/compute/docs/machine-types#sharedcore) that may not deliver steady performance. - -### Azure - -- Use v5 options with 16 vCPU in the Storage Optimized (preferred) or General Purpose VM types. For a busy YSQL instance, use 32 vCPU. -- For an application that cannot tolerate P99 spikes, local SSDs (Storage Optimized instances) are the preferred option. For more details on the tradeoffs, refer to [Local vs remote SSDs](../../deploy/kubernetes/best-practices/#local-versus-remote-ssds). -- If local SSDs are not available, use ultra disks to eliminate expected latency on Azure premium disks. Refer to the Azure [disk recommendations](https://azure.microsoft.com/en-us/blog/azure-ultra-disk-storage-microsoft-s-service-for-your-most-i-o-demanding-workloads/) and Azure documentation on [disk types](https://docs.microsoft.com/en-us/azure/virtual-machines/disks-types) for databases. -- Turn on Accelerated Networking, and use VNet peering for multiple VPCs and connectivity to object stores. diff --git a/docs/content/preview/deploy/kubernetes/_index.md b/docs/content/preview/deploy/kubernetes/_index.md deleted file mode 100644 index 8c5933dc9ecf..000000000000 --- a/docs/content/preview/deploy/kubernetes/_index.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Deploy YugabyteDB clusters on Kubernetes -headerTitle: Deploy on Kubernetes -linkTitle: Kubernetes -description: Deploy YugabyteDB clusters natively on Kubernetes with various providers -headcontent: Deploy YugabyteDB natively on Kubernetes -aliases: - - /deploy/kubernetes/ -menu: - preview: - identifier: deploy-kubernetes - parent: deploy - weight: 50 -type: indexpage ---- - -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/deploy/kubernetes/best-practices.md b/docs/content/preview/deploy/kubernetes/best-practices.md deleted file mode 100644 index 22be7be08029..000000000000 --- a/docs/content/preview/deploy/kubernetes/best-practices.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Kubernetes best practices -linkTitle: Best practices -description: Best practices -menu: - preview: - identifier: best-practices - parent: deploy-kubernetes - weight: 626 -type: docs ---- - -## Local versus remote SSDs - -Kubernetes provides you with an option of employing remote disks using dynamic provisioning or local storage which has to be preprovisioned. - -Local storage provides great performance, but the data is not replicated and can be lost if the pod is moved to a different node for maintenance operations, or if a node fails. Remote storage has slightly lower performance but the data is resilient to failures. - -The following table summarizes the tradeoffs of local vs remote storage: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FeatureLocal SSD storageRemote SSD storage
Provision large disk capacity per nodeDepends on cloud-providerYes
Disk storage resilient to failures or pod movementNoYes
Performance - latencyLowerHigher
Performance - throughputHigherLower
Typical cost characteristicsLowerHigher
Kubernetes provisioning schemePre-provisionedDynamic provisioning
- -While local storage offers higher throughput and lower latency at a lower cost, it comes with significant limitations. Maintenance operations like cluster node pool upgrades, or rolling operations to upgrade software or set flags can cause a pod to lose its local copy and require a full remote bootstrap of all local tablets from its peers. This can be time consuming for large databases. Further, for large clusters, there is always a non-zero risk of another pod movement happening during these maintenance operations, which would cause data loss for a subset of tablets. Therefore, using local storage is not recommended for most production use cases. - - diff --git a/docs/content/preview/deploy/kubernetes/clients.md b/docs/content/preview/deploy/kubernetes/clients.md deleted file mode 100644 index d57c327cbe8f..000000000000 --- a/docs/content/preview/deploy/kubernetes/clients.md +++ /dev/null @@ -1,275 +0,0 @@ ---- -title: Connect Remote Clients to Kubernetes Clusters -headerTitle: Connect Clients to Kubernetes Clusters -linkTitle: Connect Clients -description: Connect remote clients to YugabyteDB clusters deployed within Kubernetes. -menu: - preview: - identifier: clients-kubernetes - parent: deploy-kubernetes - weight: 626 -type: docs ---- - -## Prerequisites - -You must have a YugabyteDB cluster set up according to the [Kubernetes deployment instructions](../../kubernetes/). - -## Connect from within the Kubernetes cluster - -An application that is deployed within the Kubernetes cluster should use the Service DNS name `yb-tservers..svc.cluster.local` to discover server endpoints. This DNS entry has multiple `A` records, one for each YB-TServer pod, so that clients can randomize queries across different endpoints: - -```sh -kubectl --namespace yb-demo get svc/yb-tservers -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 56m -``` - -The following example shows a client that uses the YSQL shell ([ysqlsh](../../../api/ysqlsh/)) to connect: - -```sh -kubectl run ysqlsh-client -it --rm --image yugabytedb/yugabyte-client --command -- ysqlsh -h yb-tservers.yb-demo.svc.cluster.local -``` - -```sql -yugabyte=# CREATE TABLE demo(id INT PRIMARY KEY); -``` - -```output -CREATE TABLE -``` - -The following example shows a client that uses the YCQL shell ([ycqlsh](../../../api/ycqlsh/)) to connect: - -```sh -kubectl run cqlsh-shell -it --rm --image yugabytedb/yugabyte-client --command -- cqlsh yb-tservers.yb-demo.svc.cluster.local 9042 -``` - -```CQL -ycqlsh> CREATE KEYSPACE demo; -ycqlsh> use demo; -ycqlsh:demo> CREATE TABLE t_demo(id INT PRIMARY KEY); -``` - -Note that although tables are [internally sharded](../../../architecture/yb-tserver/) across multiple YB-TServer pods, every YB-TServer pod has the ability to process any query, irrespective of its actual tablet assignment. - -## Connect externally - -An application that is deployed outside the Kubernetes cluster should use the external LoadBalancer IP address to connect to the cluster. Connections to the load balancer IP address are randomly routed to one of the YB-TServer pods behind the YB-TServer service: - -```sh -kubectl get svc -n yb-demo -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -yb-master-ui LoadBalancer 10.101.142.48 98.138.219.231 7000:32168/TCP 43h -yb-masters ClusterIP None 7100/TCP,7000/TCP 43h -yb-tserver-service LoadBalancer 10.99.76.181 98.138.219.232 6379:30141/TCP,9042:31059/TCP,5433:30577/TCP 43h -yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 43h -``` - -The following example shows a client that uses the YSQL shell ([ysqlsh](../../../api/ysqlsh/)) to connect: - -```sh -docker run yugabytedb/yugabyte-client ysqlsh -h 98.138.219.232 -``` - -```sql -yugabyte=# CREATE TABLE demo(id INT PRIMARY KEY); -``` - -```output -CREATE TABLE -``` - -The following example shows a client that uses the YCQL shell ([ycqlsh](../../../api/ycqlsh/)) to connect: - -```sh -docker run yugabytedb/yugabyte-client ycqlsh 98.138.219.232 9042 -``` - -```CQL -ycqlsh> CREATE KEYSPACE demo; -ycqlsh> use demo; -ycqlsh:demo> CREATE TABLE t_demo(id INT PRIMARY KEY); -``` - -## Use YB-Master Admin UI - -The YB-Master Admin UI is available at the IP address exposed by the `yb-master-ui` LoadBalancer service at `https://98.138.219.231:7000/`. - -Another option that does not require an external LoadBalancer is to create a tunnel from the local host to the master web server port on the master pod using [kubectl port-forward](https://kubernetes.io/docs/tasks/access-application-cluster/port-forward-access-application-cluster/), as follows: - -```sh -kubectl port-forward pod/yb-master-0 7000:7000 -n yb-demo -``` - -```output -Forwarding from 127.0.0.1:7000 -> 7000 -Forwarding from [::1]:7000 -> 7000 -``` - -## Connect externally to a Minikube cluster - -When the Kubernetes cluster is set up using [Minikube](https://kubernetes.io/docs/setup/learning-environment/minikube/), an external IP address is not available by default for the LoadBalancer endpoints. To enable the load balancer IP address, run the following command `minikube tunnel`: - -```sh -minikube tunnel -``` - -```output -Status: - machine: minikube - pid: 38193 - route: 10.96.0.0/12 -> 192.168.99.100 - minikube: Running - services: [yb-master-ui, yb-tserver-service] - errors: - minikube: no errors - router: no errors - loadbalancer emulator: no errors -``` - -For details, see [LoadBalancer access](https://minikube.sigs.k8s.io/docs/handbook/accessing/#loadbalancer-access). - -## Connect TLS-secured YugabyteDB cluster deployed by Helm charts - -To start a YugabyteDB cluster with encryption in transit (TLS) enabled, follow the steps at [Google Kubernetes Service (GKE) - Helm Chart](/preview/deploy/kubernetes/single-zone/gke/helm-chart/) and set the flag `tls.enabled=true` in the helm command line, as shown in the following example: - -```shell -helm install yugabyte --namespace yb-demo --name yb-demo --set=tls.enabled=true -``` - -### Connect from within the Kubernetes cluster - -Copy the following `yb-client.yaml` and use the `kubectl create -f yb-client.yaml` command to create a pod with auto-mounted client certificates: - -```yaml -apiVersion: v1 -kind: Pod -metadata: - name: yb-client - namespace: yb-demo -spec: - containers: - - name: yb-client - image: yugabytedb/yugabyte-client:latest - env: - - name: SSL_CERTFILE - value: "/root/.yugabytedb/root.crt" - volumeMounts: - - name: yugabyte-tls-client-cert - mountPath: "/root/.yugabytedb/" - volumes: - - name: yugabyte-tls-client-cert - secret: - secretName: yugabyte-tls-client-cert - defaultMode: 256 -``` - -When a client uses the `YSQL shell` ([ysqlsh](../../../api/ysqlsh/)) to connect, you can execute the following command to verify the connection: - -```sh -kubectl exec -n yb-demo -it yb-client -- ysqlsh -h yb-tservers.yb-demo.svc.cluster.local "sslmode=require" -``` - -```output -ysqlsh (15.2-YB-{{}}-b0) -SSL connection (protocol: TLSv1.2, cipher: ECDHE-RSA-AES256-GCM-SHA384, bits: 256, compression: off) -Type "help" for help. -``` - -```sql -yugabyte=# \conninfo -``` - -```output -You are connected to database "yugabyte" as user "yugabyte" on host "yb-tservers.yb-demo.svc.cluster.local" at port "5433". -SSL connection (protocol: TLSv1.2, cipher: ECDHE-RSA-AES256-GCM-SHA384, bits: 256, compression: off) -``` - -When a client uses the YCQL shell ([ycqlsh](../../../api/ycqlsh/)) to connect, you can execute the following command to verify the connection: - -```sh -kubectl exec -n yb-demo -it yb-client -- ycqlsh yb-tservers.yb-demo.svc.cluster.local 9042 --ssl -``` - -```output -Connected to local cluster at yb-tservers.yb-demo.svc.cluster.local:9042. -[cqlsh 5.0.1 | Cassandra 3.9-SNAPSHOT | CQL spec 3.4.2 | Native protocol v4] -Use HELP for help. -``` - -```CQL -cqlsh> SHOW HOST -``` - -```output -Connected to local cluster at yb-tservers.yb-demo.svc.cluster.local:9042 -``` - -Optionally, you can use the following command to remove the client pod after the operations have been completed: - -```sh -kubectl delete pod yb-client -n yb-demo -``` - -```output -pod "yb-client" deleted -``` - -### Connect externally - -To connect externally to a TLS-enabled YugabyteDB helm cluster, start by downloading the root certificate from the Kubernetes cluster's secrets, as follows: - -```sh -mkdir $(pwd)/certs -kubectl get secret yugabyte-tls-client-cert -n yb-demo -o jsonpath='{.data.root\.crt}' | base64 --decode > $(pwd)/certs/root.crt -``` - -When a client that uses the `YSQL shell` ([ysqlsh](../../../api/ysqlsh/)) to connect, the command to execute specifies the external LoadBalancer IP of the `yb-tserver-service`, as described in [Connect using external clients](../single-zone/oss/helm-chart/#connect-using-external-clients). You can verify the connection via the following command: - -```sh -docker run -it --rm -v $(pwd)/certs/:/root/.yugabytedb/:ro yugabytedb/yugabyte-client:latest ysqlsh -h "sslmode=require" -``` - -```output -ysqlsh (15.2-YB-{{}}-b0) -SSL connection (protocol: TLSv1.2, cipher: ECDHE-RSA-AES256-GCM-SHA384, bits: 256, compression: off) -Type "help" for help. -``` - -```sh -yugabyte=# \conninfo -``` - -```output -You are connected to database "yugabyte" as user "yugabyte" on host "35.200.205.208" at port "5433". -SSL connection (protocol: TLSv1.2, cipher: ECDHE-RSA-AES256-GCM-SHA384, bits: 256, compression: off) -``` - -When a client uses the YCQL shell ([ycqlsh](../../../api/ycqlsh/)) to connect, you can verify the connection by executing the following `docker run` command: - -```sh -docker run -it --rm -v $(pwd)/certs/:/root/.yugabytedb/:ro \ ---env SSL_CERTFILE=/root/.yugabytedb/root.crt yugabytedb/yugabyte-client:latest ycqlsh 9042 --ssl -``` - -```output -ysqlsh (15.2-YB-{{}}-b0) -Connected to local cluster at 35.200.205.208:9042. -[cqlsh 5.0.1 | Cassandra 3.9-SNAPSHOT | CQL spec 3.4.2 | Native protocol v4] -Use HELP for help. -``` - -```CQL -cqlsh> SHOW HOST -``` - -```output -Connected to local cluster at 35.200.205.208:9042. -``` diff --git a/docs/content/preview/deploy/kubernetes/multi-cluster/_index.md b/docs/content/preview/deploy/kubernetes/multi-cluster/_index.md deleted file mode 100644 index f96fd97c7dd1..000000000000 --- a/docs/content/preview/deploy/kubernetes/multi-cluster/_index.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: Deploy on multiple geo-distributed Kubernetes clusters -headerTitle: Deploy on multiple Kubernetes clusters -linkTitle: Multi-cluster -description: Deploy YugabyteDB on multiple geo-distributed Kubernetes clusters. -headcontent: Deploy YugabyteDB natively on multiple Kubernetes clusters. -menu: - preview: - identifier: deploy-kubernetes-mc - parent: deploy-kubernetes - weight: 623 -type: indexpage ---- - -[Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/docs/concepts/types-of-clusters) can be configured to support global DNS across multiple Kubernetes clusters. For example, you can deploy a three-region YugabyteDB cluster on three Kubernetes clusters, each deployed in a different region, using the standard single-zone YugabyteDB Helm chart to deploy one third of the nodes in the database cluster in each of the three clusters. - -{{}} - - {{}} - -{{}} diff --git a/docs/content/preview/deploy/kubernetes/multi-cluster/gke/helm-chart.md b/docs/content/preview/deploy/kubernetes/multi-cluster/gke/helm-chart.md deleted file mode 100644 index d3135991c72f..000000000000 --- a/docs/content/preview/deploy/kubernetes/multi-cluster/gke/helm-chart.md +++ /dev/null @@ -1,534 +0,0 @@ ---- -title: Deploy a multi-region cluster on Google Kubernetes Engine (GKE) using Helm chart -headerTitle: Google Kubernetes Engine (GKE) -linkTitle: Google Kubernetes Engine (GKE) -description: Use Helm chart to deploy a multi-region YugabyteDB cluster that spans three GKE clusters across three regions. -menu: - preview: - parent: deploy-kubernetes-mc - name: Google Kubernetes Engine - identifier: k8s-mc-gke-1 - weight: 628 -aliases: - - /preview/deploy/kubernetes/multi-cluster/gke -type: docs ---- - - - -You can deploy a single multi-region YugabyteDB cluster that spans three [GKE](https://cloud.google.com/kubernetes-engine/docs/) clusters, each running in a different region. Each region also has an internal DNS load balancer set to [global access](https://cloud.google.com/kubernetes-engine/docs/how-to/internal-load-balancing#global_access). This configuration allows pods in one GKE cluster to discover pods in another GKE cluster without exposing any of the DNS information to the world outside your GKE project. - -In the example provided in this document, you will use the standard single-zone YugabyteDB Helm chart to deploy one third of the nodes in the database cluster in each of the three GKE clusters. - -## Prerequisites - -You must have three GKE clusters with Helm configured. If you have not installed the Helm client (`helm`), see [Install Helm](https://helm.sh/docs/intro/install/). - -The YugabyteDB Helm chart has been tested with the following software versions: - -- GKE running Kubernetes 1.20 or later with nodes such that a total of 12 CPU cores and 45 GB RAM can be allocated to YugabyteDB. This can be three nodes with 4 CPU core and 15 GB RAM allocated to YugabyteDB. `n1-standard-8` is the minimum instance type that meets these criteria. -- Helm 3.4 or later. -- YugabyteDB Docker image (yugabytedb/yugabyte) 2.1.0 or later. -- For optimal performance, ensure you have set the appropriate [system limits using `ulimit`](../../../../manual-deployment/system-config/#set-ulimits) on each node in your Kubernetes cluster. - -The following steps show how to meet these prerequisites: - -- Download and install the [Google Cloud SDK](https://cloud.google.com/sdk/downloads/). - -- Configure defaults for `gcloud`. - -- Set the project ID to `yugabyte`. You can change this as per your need. - - ```sh - gcloud config set project yugabyte - ``` - -- Install the `kubectl`command line tool by running the following command: - - ```sh - gcloud components install kubectl - ``` - - Note that GKE is usually two or three major releases behind the upstream or OSS Kubernetes release. This means you have to make sure that you have the latest `kubectl` version that is compatible across different Kubernetes distributions. - -- Ensure `helm` is installed by using the Helm version command: - - ```sh - helm version - ``` - - Expect an output similar to the following output: - - ```output - version.BuildInfo{Version:"v3.0.3", GitCommit:"ac925eb7279f4a6955df663a0128044a8a6b7593", GitTreeState:"clean", GoVersion:"go1.13.6"} - ``` - - Note that the `tiller` server side component has been removed in Helm 3. - -## Create GKE clusters - -You start by creating clusters and then storage classes per zone. - -### Create clusters - -The following commands create three Kubernetes clusters in three different regions (`us-west1`, `us-central1`, `us-east1`), with one node in each cluster, therefore generating a multi-region multi-cluster Kubernetes configuration: - -```sh -gcloud beta container clusters create yugabytedb1 \ - --machine-type=n1-standard-8 \ - --num-nodes 1 \ - --zone us-west1-b \ - --release-channel rapid -``` - -```sh -gcloud beta container clusters create yugabytedb2 \ - --machine-type=n1-standard-8 \ - --num-nodes 1 \ - --zone us-central1-b \ - --release-channel rapid -``` - -```sh -gcloud beta container clusters create yugabytedb3 \ - --machine-type=n1-standard-8 \ - --num-nodes 1 \ - --zone us-east1-b \ - --release-channel rapid -``` - -Use the following command to confirm that Kubernetes contexts have been created: - -```sh -kubectl config get-contexts -``` - -```output -CURRENT NAME CLUSTER ... - gke_yugabyte_us-central1-b_yugabytedb2 gke_yugabyte_us-central1-b_yugabytedb2 -* gke_yugabyte_us-east1-b_yugabytedb3 gke_yugabyte_us-east1-b_yugabytedb3 - gke_yugabyte_us-west1-b_yugabytedb1 gke_yugabyte_us-west1-b_yugabytedb1 -``` - -Note that [Global access](https://cloud.google.com/kubernetes-engine/docs/how-to/internal-load-balancing#global_access) on load balancers is currently in beta and is available only on GKE clusters created using the `rapid` release channel. - -### Create a storage class per zone - -You need to ensure that the storage classes used by the pods in a given zone are always pinned to that zone only. - -Add the following contents to a file named `gke-us-west1-b.yaml`: - -```yaml -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: standard-us-west1-b -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-standard - replication-type: none - zone: us-west1-b -``` - -Add the following contents to a file named `gke-us-central1-b.yaml`. - -```yaml -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: standard-us-central1-b -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-standard - replication-type: none - zone: us-central1-b -``` - -Add the following contents to a file named `gke-us-east1-b.yaml`. - -```yaml -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: standard-us-east1-b -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-standard - replication-type: none - zone: us-east1-b -``` - -Apply the preceding configuration to your clusters, as follows: - -```sh -kubectl apply -f gke-us-west1-b.yaml --context gke_yugabyte_us-west1-b_yugabytedb1 -``` - -```sh -kubectl apply -f gke-us-central1-b.yaml --context gke_yugabyte_us-central1-b_yugabytedb2 -``` - -```sh -kubectl apply -f gke-us-east1-b.yaml --context gke_yugabyte_us-east1-b_yugabytedb3 -``` - -## Set up global DNS - -Set up a global DNS system across all three GKE clusters so that pods in one cluster can connect to pods in another cluster. - -### Create load balancer configuration for kube-dns - -The following YAML file adds an internal load balancer (which is not exposed outside its own Google Cloud region) to Kubernetes built-in `kube-dns` deployment. By default, the `kube-dns` deployment is accessed only by a `ClusterIP` and not a load balancer. You need to allow this load balancer to be [globally accessible](https://cloud.google.com/kubernetes-engine/docs/how-to/internal-load-balancing#global_access) so that each such load balancer is visible to two other load balancers in the other two regions. - -Add the following contents to a file named `yb-dns-lb.yaml`: - -```yaml -apiVersion: v1 -kind: Service -metadata: - annotations: - cloud.google.com/load-balancer-type: "Internal" - networking.gke.io/internal-load-balancer-allow-global-access: "true" - labels: - k8s-app: kube-dns - name: kube-dns-lb - namespace: kube-system -spec: - ports: - - name: dns - port: 53 - protocol: UDP - targetPort: 53 - selector: - k8s-app: kube-dns - sessionAffinity: None - type: LoadBalancer -``` - -Note that using external load balancers for this purpose is possible but not recommended from a security perspective (the DNS information for all the clusters would be available for access on the public Internet). - -### Apply the configuration to every cluster - -Execute the following command to download the `yb-multiregion-k8s-setup.py` script to automate the setup of the load balancers: - -```sh -wget https://raw.githubusercontent.com/yugabyte/yugabyte-db/master/cloud/kubernetes/yb-multiregion-k8s-setup.py -``` - -The script starts out by creating a new namespace in each of the three clusters. Thereafter, it creates three internal load balancers for `kube-dns` in the three clusters. After the load balancers are created, it configures them using Kubernetes ConfigMap in such a way that they forward DNS requests for zone-scoped namespaces to the relevant Kubernetes cluster's DNS server. Finally, it deletes the `kube-dns` pods to allow Kubernetes to bring them back up automatically with the new configuration. - -Open the `yb-multiregion-k8s-setup.py` script and edit the `contexts` and `regions` sections to reflect your own configuration, as follows: - -```python -# Replace this with your own kubernetes cluster contexts -contexts = { - 'us-west1-b': 'gke_yugabyte_us-west1-b_yugabytedb1', - 'us-central1-b': 'gke_yugabyte_us-central1-b_yugabytedb2', - 'us-east1-b': 'gke_yugabyte_us-east1-b_yugabytedb3', -} - -# Replace this with your own `zone`: `region` names -regions = { - 'us-west1-b': 'us-west1', - 'us-central1-b': 'us-central1', - 'us-east1-b': 'us-east1', -} -``` - -Run the script using the following command: - -```sh -python yb-multiregion-k8s-setup.py -``` - -```output -namespace/yb-demo-us-east1-b created -service/kube-dns-lb created -namespace/yb-demo-us-central1-b created -service/kube-dns-lb created -namespace/yb-demo-us-west1-b created -service/kube-dns-lb created -DNS endpoint for zone us-east1-b: 10.142.15.197 -DNS endpoint for zone us-central1-b: 10.128.15.215 -DNS endpoint for zone us-west1-b: 10.138.15.237 -pod "kube-dns-68b499d58-wn5zv" deleted -pod "kube-dns-68b499d58-h2m28" deleted -pod "kube-dns-68b499d58-4jl89" deleted -``` - -We now have three GKE clusters that essentially have a global DNS service as long as services use zone-scoped namespaces to access each other. - -## Create a YugabyteDB cluster - -You start by adding the Helm charts repository, then creating override files, and then proceeding to download YugabyteDB. - -### Add charts repository - -To add the YugabyteDB charts repository, run the following command: - -```sh -helm repo add yugabytedb https://charts.yugabyte.com -``` - -Make sure that you have the latest updates to the repository by running the following command: - -```sh -helm repo update -``` - -Validate that you have the updated chart version, as follows: - -```sh -helm search repo yugabytedb/yugabyte --version {{}} -``` - -```output -NAME CHART VERSION APP VERSION DESCRIPTION -yugabytedb/yugabyte {{}} {{}} YugabyteDB is the high-performance distributed ... -``` - -### Create override files - -Add the following contents to a file named `overrides-us-west1-b.yaml`: - -```yaml -isMultiAz: True - -AZ: us-west1-b - -masterAddresses: "yb-master-0.yb-masters.yb-demo-us-west1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east1-b.svc.cluster.local:7100" - -storage: - master: - storageClass: "standard-us-west1-b" - tserver: - storageClass: "standard-us-west1-b" - -replicas: - master: 1 - tserver: 1 - totalMasters: 3 - -gflags: - master: - placement_cloud: "gke" - placement_region: "us-west1" - placement_zone: "us-west1-b" - leader_failure_max_missed_heartbeat_periods: 10 - tserver: - placement_cloud: "gke" - placement_region: "us-west1" - placement_zone: "us-west1-b" - leader_failure_max_missed_heartbeat_periods: 10 -``` - -Add the following contents to a file named `overrides-us-central1-b.yaml`: - -```yaml -isMultiAz: True - -AZ: us-central1-b - -masterAddresses: "yb-master-0.yb-masters.yb-demo-us-west1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east1-b.svc.cluster.local:7100" - -storage: - master: - storageClass: "standard-us-central1-b" - tserver: - storageClass: "standard-us-central1-b" - -replicas: - master: 1 - tserver: 1 - totalMasters: 3 - -gflags: - master: - placement_cloud: "gke" - placement_region: "us-central1" - placement_zone: "us-central1-b" - leader_failure_max_missed_heartbeat_periods: 10 - tserver: - placement_cloud: "gke" - placement_region: "us-central1" - placement_zone: "us-central1-b" - leader_failure_max_missed_heartbeat_periods: 10 -``` - -Add the following contents to a file named `overrides-us-east1-b.yaml`: - -```yaml -isMultiAz: True - -AZ: us-east1-b - -masterAddresses: "yb-master-0.yb-masters.yb-demo-us-west1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east1-b.svc.cluster.local:7100" - -storage: - master: - storageClass: "standard-us-east1-b" - tserver: - storageClass: "standard-us-east1-b" - -replicas: - master: 1 - tserver: 1 - totalMasters: 3 - -gflags: - master: - placement_cloud: "gke" - placement_region: "us-east1" - placement_zone: "us-east1-b" - leader_failure_max_missed_heartbeat_periods: 10 - tserver: - placement_cloud: "gke" - placement_region: "us-east1" - placement_zone: "us-east1-b" - leader_failure_max_missed_heartbeat_periods: 10 -``` - -### Install YugabyteDB - -Create the YugabyteDB cluster such that one third of the nodes are hosted in each Kubernetes cluster, as follows: - -```sh -helm install yb-demo-us-west1-b yugabytedb/yugabyte \ - --version {{}} \ - --namespace yb-demo-us-west1-b \ - -f overrides-us-west1-b.yaml \ - --kube-context gke_yugabyte_us-west1-b_yugabytedb1 --wait -``` - -```sh -helm install yb-demo-us-central1-b yugabytedb/yugabyte \ - --version {{}} \ - --namespace yb-demo-us-central1-b \ - -f overrides-us-central1-b.yaml \ - --kube-context gke_yugabyte_us-central1-b_yugabytedb2 --wait -``` - -```sh -helm install yb-demo-us-east1-b yugabytedb/yugabyte \ - --version {{}} \ - --namespace yb-demo-us-east1-b \ - -f overrides-us-east1-b.yaml \ - --kube-context gke_yugabyte_us-east1-b_yugabytedb3 --wait -``` - -## Check the cluster status - -There is a number of commands that you can execute to check the status of the cluster. - -Check the pods, as follows: - -```sh -kubectl get pods -n yb-demo-us-west1-b --context gke_yugabyte_us-west1-b_yugabytedb1 -``` - -```sh -kubectl get pods -n yb-demo-us-central1-b --context gke_yugabyte_us-central1-b_yugabytedb2 -``` - -```sh -kubectl get pods -n yb-demo-us-east1-b --context gke_yugabyte_us-east1-b_yugabytedb3 -``` - -Check the services, as follows: - -```sh -kubectl get services -n yb-demo-us-west1-b --context gke_yugabyte_us-west1-b_yugabytedb1 -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -yb-master-ui LoadBalancer 10.31.250.228 35.185.207.11 7000:31185/TCP 91m -yb-masters ClusterIP None 7100/TCP,7000/TCP 91m -yb-tserver-service LoadBalancer 10.31.247.185 34.83.192.162 6379:31858/TCP,9042:30444/TCP,5433:30854/TCP 91m -yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 91m -``` - -```sh -kubectl get services -n yb-demo-us-central1-b --context gke_yugabyte_us-central1-b_yugabytedb2 -``` - -```sh -kubectl get services -n yb-demo-us-east1-b --context gke_yugabyte_us-east1-b_yugabytedb3 -``` - -Access the YB-Master Admin UI for the cluster at `http://:7000` where `external-ip` refers to one of the `yb-master-ui` services. Note that you can use any of the three services for this purpose since all of them show the same cluster metadata. - -![mz-ybmaster](/images/deploy/kubernetes/gke-multicluster-ybmaster.png) - -## Configure the region-aware replica placement - -The default replica placement policy treats every YB-TServer as equal, irrespective of its `placement_*` flags. To confirm that the default configuration is still in effect, navigate to `http://:7000/cluster-config` and expect to see the following: - -![before-regionaware](/images/deploy/kubernetes/gke-multicluster-before-regionaware.png) - -Run the following command to make the replica placement region-aware or cluster-aware so that one replica is placed on each region or cluster: - -```sh -kubectl exec -it -n yb-demo-us-west1-b --context gke_yugabyte_us-west1-b_yugabytedb1 yb-master-0 -- bash \ --c "/home/yugabyte/master/bin/yb-admin --master_addresses yb-master-0.yb-masters.yb-demo-us-west1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east1-b.svc.cluster.local:7100 modify_placement_info gke.us-west1.us-west1-b,gke.us-central1.us-central1-b,gke.us-east1.us-east1-b 3" -``` - -To view the new configuration, navigate to `http://:7000/cluster-config` and expect to see the following: - -![after-regionaware](/images/deploy/kubernetes/gke-multicluster-after-regionaware.png) - -## Connect using YSQL and YCQL shells - -To connect and use the YSQL Shell (ysqlsh), run the following command: - -```sh -kubectl exec -n yb-demo-us-west1-b --context gke_yugabyte_us-west1-b_yugabytedb1 \ - -it yb-tserver-0 -- ysqlsh -h yb-tserver-0.yb-tservers.yb-demo-us-west1-b -``` - -To connect and use the YCQL Shell (ycqlsh), run the following command: - -```sh -kubectl exec -n yb-demo-us-west1-b --context gke_yugabyte_us-west1-b_yugabytedb1 \ --it yb-tserver-0 -- ycqlsh yb-tserver-0.yb-tservers.yb-demo-us-west1-b -``` - -Follow the instructions provided in [Explore YSQL](/preview/quick-start/explore/ysql/) and then browse to `http://:7000/tablet-servers` of the YB-Master Admin UI to confirm that tablet peers and their leaders are placed evenly across all three zones for both user data and system data, as per the following illustration: - -![mz-ybtserver](/images/deploy/kubernetes/gke-multicluster-ybtserver.png) - -## Connect using external clients - -To connect an external program, get the load balancer `EXTERNAL-IP` address of the `yb-tserver-service` service and connect using port 5433 for YSQL or port 9042 for YCQL, as follows: - -```sh -kubectl get services -n yb-demo-us-west1-b --context gke_yugabyte_us-west1-b_yugabytedb1 -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -... -yb-tserver-service LoadBalancer 10.31.247.185 34.83.192.162 6379:31858/TCP,9042:30444/TCP,5433:30854/TCP 91m -... -``` - -## Test the YugabyteDB cluster resilience - -You can test the resilience of the cluster when it is subjected to the complete failure of one region. To simulate such a failure, set the replica count of the YugabyteDB StatefulSets to `0` for the `us-central1` region, as follows: - -```sh -kubectl scale statefulset yb-tserver --replicas=0 -n yb-demo-us-central1-b \ - --context gke_yugabyte_us-central1-b_yugabytedb2 - -kubectl scale statefulset yb-master --replicas=0 -n yb-demo-us-central1-b \ - --context gke_yugabyte_us-central1-b_yugabytedb2 -``` - -Now rerun the queries from [Connect using YSQL and YCQL shells](#connect-using-ysql-and-ycql-shells) after reconnecting to the nodes in the `us-west1` region to see that there is no impact to the availability of the cluster and the data stored therein. However, there is higher latency for some of the transactions, since the farthest `us-east1` region has to be involved in the write path. In other words, the database cluster is fully protected against region failures but may temporarily experience higher latency, which is a better outcome than a complete outage of the business-critical database service. See [Understanding How YugabyteDB Runs on Kubernetes](https://www.yugabyte.com/blog/understanding-how-yugabyte-db-runs-on-kubernetes/) for details on how YugabyteDB self-heals the replicas when subjected to the failure of a fault domain (the cloud region, in this case) by auto-electing a new leader for each of the impacted shards in the remaining fault domains. The cluster goes back to its original configuration as soon as the nodes in the lost region become available again. diff --git a/docs/content/preview/deploy/kubernetes/multi-zone/_index.md b/docs/content/preview/deploy/kubernetes/multi-zone/_index.md deleted file mode 100644 index d8a9d31766c5..000000000000 --- a/docs/content/preview/deploy/kubernetes/multi-zone/_index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Deploy YugabyteDB on a multi-zone Kubernetes cluster -linkTitle: Multi-zone -headerTitle: Multi-zone Kubernetes -description: Deploy YugabyteDB on multi-zone Kubernetes -headcontent: Deploy YugabyteDB on a multi-zone Kubernetes cluster -menu: - preview: - identifier: deploy-kubernetes-mz - parent: deploy-kubernetes - weight: 622 -type: indexpage ---- - -[Amazon Elastic Kubernetes Service](https://docs.aws.amazon.com/eks/latest/userguide/network_reqs.html) and [Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/docs/concepts/types-of-clusters) support multi-zone Kubernetes clusters automatically. The following instructions describe how to deploy a 3-zone YugabyteDB cluster on a 3-zone Kubernetes cluster. Both these deployments use the standard single-zone YugabyteDB Helm Chart to deploy one third of the nodes in the database cluster in each of the 3 zones. - -{{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/deploy/kubernetes/multi-zone/eks/helm-chart.md b/docs/content/preview/deploy/kubernetes/multi-zone/eks/helm-chart.md deleted file mode 100644 index f38ab018e0a7..000000000000 --- a/docs/content/preview/deploy/kubernetes/multi-zone/eks/helm-chart.md +++ /dev/null @@ -1,366 +0,0 @@ ---- -title: Deploy multi zone on EKS using Helm Chart -headerTitle: Amazon Elastic Kubernetes Service (EKS) -linkTitle: Amazon Elastic Kubernetes Service (EKS) -description: Deploy a multi-zone YugabyteDB cluster on Amazon Elastic Kubernetes Service (EKS) using Helm Chart. -menu: - preview: - parent: deploy-kubernetes-mz - name: Amazon EKS - identifier: k8s-mz-eks-1 - weight: 627 -aliases: - - /preview/deploy/kubernetes/multi-zone/eks -type: docs ---- - - - -## Prerequisites - -You must have a Amazon EKS cluster that has Helm configured. Note that Amazon EKS clusters are deployed across multiple zones by default. If you have not installed the Helm client (`helm`), see [Installing Helm](https://helm.sh/docs/intro/install/). - -The YugabyteDB Helm chart has been tested with the following software versions: - -- Amazon EKS running Kubernetes 1.18 (or later) with nodes such that a total of 12 CPU cores and 45 GB RAM can be allocated to YugabyteDB. This can be three nodes with 4 CPU core and 15 GB RAM allocated to YugabyteDB. `m5.2xlarge` is the minimum AWS EC2 instance type that meets these criteria. -- Helm 3.4 or later -- YugabyteDB docker image (yugabytedb/yugabyte) 2.1.0 or later -- For optimal performance, ensure you've set the appropriate [system limits using `ulimit`](../../../../manual-deployment/system-config/#set-ulimits) on each node in your Kubernetes cluster. - -The following steps show how to meet these prerequisites. - -- Install and configure the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/installing.html). - -- Install [`eksctl`](https://eksctl.io/) - -`eksctl` is a basic command line utility for creating and managing Amazon EKS clusters. Detailed instructions for installing eksctl based on the OS of your choice are available at [Getting Started with eksctl](https://docs.aws.amazon.com/eks/latest/userguide/getting-started-eksctl.html). The following instructions apply to macOS. - -```sh -$ brew tap weaveworks/tap -$ brew install weaveworks/tap/eksctl -``` - -Test that your installation was successful. - -```sh -$ eksctl version -``` - -- Install and configure `kubectl` for Amazon EKS - -You have multiple options to download and install `kubectl` for your OS. Note that Amazon EKS also vends kubectl binaries that you can use that are identical to the upstream kubectl binaries with the same version. To install the Amazon EKS-vended binary for your operating system, see [Installing kubectl](https://docs.aws.amazon.com/eks/latest/userguide/install-kubectl.html). - -- Ensure `helm` is installed - -First, check to see if Helm is installed by using the Helm version command. - -```sh -$ helm version -``` - -You should see something similar to the following output. Note that the `tiller` server side component has been removed in Helm 3. - -```output -version.BuildInfo{Version:"v3.0.3", GitCommit:"ac925eb7279f4a6955df663a0128044a8a6b7593", GitTreeState:"clean", GoVersion:"go1.13.6"} -``` - -## 1. Create a EKS cluster - -### Create EKS cluster - -Create a EKS cluster, if you have not already done so, by running the following command. Note that if you do not specify 3 zones in the `zones` parameter explicitly then EKS may place the 3 nodes in [only 2 zones](https://docs.aws.amazon.com/eks/latest/userguide/network_reqs.html). - -```sh -$ eksctl create cluster \ ---name yb-multizone \ ---version 1.14 \ ---region us-east-1 \ ---zones us-east-1a,us-east-1b,us-east-1c \ ---nodegroup-name standard-workers \ ---node-type m5.2xlarge \ ---nodes 3 \ ---nodes-min 1 \ ---nodes-max 4 \ ---managed -``` - -As stated in the Prerequisites section, the default configuration in the YugabyteDB Helm Chart requires Kubernetes nodes to have a total of 12 CPU cores and 45 GB RAM allocated to YugabyteDB. This can be three nodes with 4 CPU cores and 15 GB RAM allocated to YugabyteDB. The smallest AWS instance type that meets this requirement is `m5.2xlarge` which has 8 CPU cores and 32 GB RAM. - -### Create a storage class - -We need to specify `WaitForFirstConsumer` mode for the volumeBindingMode so that volumes will be provisioned according to pods' zone affinities. - -Copy the contents below to a file named `storage.yaml`. - -```yaml -kind: StorageClass -metadata: - name: yb-storage -apiVersion: storage.k8s.io/v1 -allowVolumeExpansion: true -provisioner: kubernetes.io/aws-ebs -volumeBindingMode: WaitForFirstConsumer -parameters: - type: gp2 - fsType: xfs -``` - -Apply the above configuration to your cluster. - -```sh -$ kubectl apply -f storage.yaml -``` - -## 2. Create a YugabyteDB cluster - -### Add charts repository - -To add the YugabyteDB charts repository, run the following command. - -```sh -$ helm repo add yugabytedb https://charts.yugabyte.com -``` - -Make sure that you have the latest updates to the repository by running the following command. - -```sh -$ helm repo update -``` - -Validate that you have the updated chart version. - -```sh -$ helm search repo yugabytedb/yugabyte --version {{}} -``` - -```output -NAME CHART VERSION APP VERSION DESCRIPTION -yugabytedb/yugabyte {{}} {{}} YugabyteDB is the high-performance distributed ... -``` - -### Create override files - -Copy the contents below to a file named `overrides-us-east-1a.yaml`. - -```yaml -isMultiAz: True - -AZ: us-east-1a - -masterAddresses: "yb-master-0.yb-masters.yb-demo-us-east-1a.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east-1b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east-1c.svc.cluster.local:7100" - -storage: - master: - storageClass: "yb-storage" - tserver: - storageClass: "yb-storage" - -replicas: - master: 1 - tserver: 1 - totalMasters: 3 - -gflags: - master: - placement_cloud: "aws" - placement_region: "us-east-1" - placement_zone: "us-east-1a" - tserver: - placement_cloud: "aws" - placement_region: "us-east-1" - placement_zone: "us-east-1a" -``` - -Copy the contents below to a file named `overrides-us-east-1b.yaml`. - -```yaml -isMultiAz: True - -AZ: us-east-1b - -masterAddresses: "yb-master-0.yb-masters.yb-demo-us-east-1a.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east-1b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east-1c.svc.cluster.local:7100" - -storage: - master: - storageClass: "yb-storage" - tserver: - storageClass: "yb-storage" - -replicas: - master: 1 - tserver: 1 - totalMasters: 3 - -gflags: - master: - placement_cloud: "aws" - placement_region: "us-east-1" - placement_zone: "us-east-1b" - tserver: - placement_cloud: "aws" - placement_region: "us-east-1" - placement_zone: "us-east-1b" -``` - -Copy the contents below to a file named `overrides-us-east-1c.yaml`. - -```yaml -isMultiAz: True - -AZ: us-east-1c - -masterAddresses: "yb-master-0.yb-masters.yb-demo-us-east-1a.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east-1b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east-1c.svc.cluster.local:7100" - -storage: - master: - storageClass: "yb-storage" - tserver: - storageClass: "yb-storage" - -replicas: - master: 1 - tserver: 1 - totalMasters: 3 - -gflags: - master: - placement_cloud: "aws" - placement_region: "us-east-1" - placement_zone: "us-east-1c" - tserver: - placement_cloud: "aws" - placement_region: "us-east-1" - placement_zone: "us-east-1c" -``` - -### Install YugabyteDB - -Install YugabyteDB in the Kubernetes cluster using the commands below. - -For Helm, you have to first create the 3 namespaces. - -```sh -$ kubectl create namespace yb-demo-us-east-1a -$ kubectl create namespace yb-demo-us-east-1b -$ kubectl create namespace yb-demo-us-east-1c -``` - -Now create the overall YugabyteDB cluster in such a way that one third of the nodes are hosted in each zone. - -```sh -$ helm install yb-demo-us-east-1a yugabytedb/yugabyte \ - --version {{}} \ - --namespace yb-demo-us-east-1a \ - -f overrides-us-east-1a.yaml --wait -``` - -```sh -$ helm install yb-demo-us-east-1b yugabytedb/yugabyte \ - --version {{}} \ - --namespace yb-demo-us-east-1b \ - -f overrides-us-east-1b.yaml --wait -``` - -```sh -$ helm install yb-demo-us-east-1c yugabytedb/yugabyte \ - --version {{}} \ - --namespace yb-demo-us-east-1c \ - -f overrides-us-east-1c.yaml --wait -``` - -## 3. Check the cluster status - -You can check the status of the cluster using various commands noted below. - -Check the pods. - -```sh -$ kubectl get pods --all-namespaces -``` - -Check the services. - -```sh -$ kubectl get services --all-namespaces -``` - -```output -NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -default kubernetes ClusterIP 10.100.0.1 443/TCP 20m -kube-system kube-dns ClusterIP 10.100.0.10 53/UDP,53/TCP 20m -yb-demo-us-east-1a yb-master-ui LoadBalancer 10.100.189.217 ad37cdc4667de11ea87920e8fdeea06a-261093161.us-east-1.elb.amazonaws.com 7000:31087/TCP 5m12s -yb-demo-us-east-1a yb-masters ClusterIP None 7100/TCP,7000/TCP 5m12s -yb-demo-us-east-1a yb-tserver-service LoadBalancer 10.100.97.195 ad37e06fb67de11ea87920e8fdeea06a-238172614.us-east-1.elb.amazonaws.com 6379:30334/TCP,9042:31406/TCP,5433:30024/TCP 5m12s -yb-demo-us-east-1a yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 5m12s -yb-demo-us-east-1b yb-master-ui LoadBalancer 10.100.61.215 a2348e9ce67df11ea9fec12feeb58bc1-1248396869.us-east-1.elb.amazonaws.com 7000:31595/TCP 2m58s -yb-demo-us-east-1b yb-masters ClusterIP None 7100/TCP,7000/TCP 2m58s -yb-demo-us-east-1b yb-tserver-service LoadBalancer 10.100.99.202 a2347e74f67df11ea9fec12feeb58bc1-1599278236.us-east-1.elb.amazonaws.com 6379:31292/TCP,9042:30275/TCP,5433:30450/TCP 2m58s -yb-demo-us-east-1b yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 2m58s -yb-demo-us-east-1c yb-master-ui LoadBalancer 10.100.0.232 a6cd5518167df11ea9fec12feeb58bc1-1402605575.us-east-1.elb.amazonaws.com 7000:31678/TCP 55s -yb-demo-us-east-1c yb-masters ClusterIP None 7100/TCP,7000/TCP 55s -yb-demo-us-east-1c yb-tserver-service LoadBalancer 10.100.119.40 a6cd628b667df11ea9fec12feeb58bc1-403831649.us-east-1.elb.amazonaws.com 6379:31544/TCP,9042:31541/TCP,5433:32374/TCP 55s -yb-demo-us-east-1c yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 55s -``` - -Access the yb-master Admin UI for the cluster at `http://:7000` where `external-ip` refers to one of the `yb-master-ui` services. Note that you can use any of the above three services for this purpose as all of them will show the same cluster metadata. - -![mz-ybmaster](/images/deploy/kubernetes/aws-multizone-ybmaster.png) - -## 4. Configure zone-aware replica placement - -Default replica placement policy treats every yb-tserver as equal irrespective of its `placement_*` setting. Go to `http://:7000/cluster-config` to confirm that the default configuration is still in effect. - -![before-zoneaware](/images/deploy/kubernetes/gke-aws-multizone-before-zoneaware.png) - -To make the replica placement zone-aware, so that one replica is placed in each zone, run the following command: - -```sh -$ kubectl exec -it -n yb-demo-us-east-1a yb-master-0 -- bash \ --c "/home/yugabyte/master/bin/yb-admin --master_addresses yb-master-0.yb-masters.yb-demo-us-east-1a.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east-1b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-east-1c.svc.cluster.local:7100 modify_placement_info aws.us-east-1.us-east-1a,aws.us-east-1.us-east-1b,aws.us-east-1.us-east-1c 3" -``` - -To see the new configuration, go to `http://:7000/cluster-config`. - -![after-zoneaware](/images/deploy/kubernetes/aws-multizone-after-zoneaware.png) - -## 5. Connect using YugabyteDB shells - -To connect and use the YSQL Shell (ysqlsh), run the following command: - -```sh -$ kubectl exec -n yb-demo-us-east-1a -it yb-tserver-0 -- ysqlsh \ - -h yb-tserver-0.yb-tservers.yb-demo-us-east-1a -``` - -To connect and use the YCQL Shell (ycqlsh), run the following command: - -```sh -$ kubectl exec -n yb-demo-us-east-1a -it yb-tserver-0 -- ycqlsh \ -yb-tserver-0.yb-tservers.yb-demo-us-east-1a -``` - -You can follow the [Explore YSQL](/preview/quick-start/explore/ysql/) tutorial and then go to the `http://:7000/tablet-servers` page of the yb-master Admin UI to confirm that tablet peers and their leaders are placed evenly across all three zones for both user data and system data. - -![mz-ybtserver](/images/deploy/kubernetes/aws-multizone-ybtserver.png) - -## 6. Connect using external clients - -To connect an external program, get the load balancer `EXTERNAL-IP` address of the `yb-tserver-service` service and connect using port 5433 for YSQL or port 9042 for YCQL, as follows: - -```sh -$ kubectl get services --namespace yb-demo -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -... -yb-demo-us-east-1a yb-tserver-service LoadBalancer 10.100.97.195 ad37e06fb67de11ea87920e8fdeea06a-238172614.us-east-1.elb.amazonaws.com 6379:30334/TCP,9042:31406/TCP,5433:30024/TCP 5m12s -... -``` diff --git a/docs/content/preview/deploy/kubernetes/multi-zone/gke/helm-chart.md b/docs/content/preview/deploy/kubernetes/multi-zone/gke/helm-chart.md deleted file mode 100644 index d51d9069594f..000000000000 --- a/docs/content/preview/deploy/kubernetes/multi-zone/gke/helm-chart.md +++ /dev/null @@ -1,375 +0,0 @@ ---- -title: Deploy on Google Kubernetes Engine (GKE) using Helm Chart -headerTitle: Google Kubernetes Engine (GKE) -linkTitle: Google Kubernetes Engine (GKE) -description: Deploy a multi-zonal or regional Google Kubernetes Engine (GKE) using Helm Chart. -menu: - preview: - parent: deploy-kubernetes-mz - name: Google Kubernetes Engine - identifier: k8s-mz-gke-1 - weight: 628 -aliases: - - /preview/deploy/kubernetes/multi-zone/gke -type: docs ---- - - - -## Prerequisites - -You must have a [multi-zonal](https://cloud.google.com/kubernetes-engine/docs/concepts/types-of-clusters#multi-zonal_clusters) or [regional](https://cloud.google.com/kubernetes-engine/docs/concepts/types-of-clusters#regional_clusters) GKE cluster that has Helm configured. If you have not installed the Helm client (`helm`), see [Installing Helm](https://helm.sh/docs/intro/install/). - -The YugabyteDB Helm Chart has been tested with the following software versions: - -- GKE running Kubernetes 1.18 (or later) with nodes such that a total of 12 CPU cores and 45 GB RAM can be allocated to YugabyteDB. This can be three nodes with 4 CPU core and 15 GB RAM allocated to YugabyteDB. `n1-standard-8` is the minimum instance type that meets these criteria. -- Helm 3.4 or later -- YugabyteDB docker image (`yugabytedb/yugabyte`) 2.1.0 or later -- For optimal performance, ensure you've set the appropriate [system limits using `ulimit`](../../../../manual-deployment/system-config/#set-ulimits) on each node in your Kubernetes cluster. - -The following steps show how to meet these prerequisites. - -- Download and install the [Google Cloud SDK](https://cloud.google.com/sdk/downloads/). - -- Configure defaults for `gcloud` - -Set the project ID as `yugabyte`. You can change this as per your need. - -```sh -$ gcloud config set project yugabyte -``` - -- Install `kubectl` - -After installing the Google Cloud SDK, install the `kubectl` command line tool by running the following command. - -```sh -$ gcloud components install kubectl -``` - -Note that GKE is usually 2 or 3 major releases behind the upstream/OSS Kubernetes release. This means you have to make sure that you have the latest `kubectl` version that is compatible across different Kubernetes distributions if that's what you intend to. - -- Ensure `helm` is installed - -First, check to see if Helm is installed by using the Helm version command. - -```sh -$ helm version -``` - -You should see something similar to the following output. Note that the `tiller` server side component has been removed in Helm 3. - -```output -version.BuildInfo{Version:"v3.0.3", GitCommit:"ac925eb7279f4a6955df663a0128044a8a6b7593", GitTreeState:"clean", GoVersion:"go1.13.6"} -``` - -## 1. Create a GKE cluster - -### Create regional cluster - -Following command creates a 3-node cluster with 1 node each in the us-central1-a, us-central1-b and us-central1-c zones. - -```sh -$ gcloud container clusters create my-regional-cluster \ - --machine-type=n1-standard-8 \ - --num-nodes 1 \ - --region us-central1 \ - --node-locations us-central1-a,us-central1-b,us-central1-c -``` - -```output -... -NAME LOCATION MASTER_VERSION MASTER_IP MACHINE_TYPE NODE_VERSION NUM_NODES STATUS -my-regional-cluster us-central1 1.14.10-gke.17 35.226.36.261 n1-standard-8 1.14.10-gke.17 3 RUNNING -``` - -As stated in the Prerequisites section, the default configuration in the YugabyteDB Helm Chart requires Kubernetes nodes to have a total of 12 CPU cores and 45 GB RAM allocated to YugabyteDB. This can be three nodes with 4 CPU cores and 15 GB RAM allocated to YugabyteDB. The smallest Google Cloud machine type that meets this requirement is `n1-standard-8` which has 8 CPU cores and 30 GB RAM. - -### Create a storage class - -We need to specify `WaitForFirstConsumer` mode for the volumeBindingMode so that volumes will be provisioned according to pods' zone affinities. - -Copy the contents below to a file named `storage.yaml`. - -```yaml -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: yb-storage -provisioner: kubernetes.io/gce-pd -allowVolumeExpansion: true -volumeBindingMode: WaitForFirstConsumer -parameters: - type: pd-ssd - fsType: xfs -``` - -Apply the above configuration to your cluster. - -```sh -$ kubectl apply -f storage.yaml -``` - -## 2. Create a YugabyteDB cluster - -### Add charts repository - -To add the YugabyteDB charts repository, run the following command. - -```sh -$ helm repo add yugabytedb https://charts.yugabyte.com -``` - -Make sure that you have the latest updates to the repository by running the following command. - -```sh -$ helm repo update -``` - -Validate that you have the updated Chart version. - -```sh -$ helm search repo yugabytedb/yugabyte --version {{}} -``` - -```output -NAME CHART VERSION APP VERSION DESCRIPTION -yugabytedb/yugabyte {{}} {{}} YugabyteDB is the high-performance distributed ... -``` - -### Create override files - -Copy the contents below to a file named `overrides-us-central1-a.yaml`. - -```yaml -isMultiAz: True - -AZ: us-central1-a - -masterAddresses: "yb-master-0.yb-masters.yb-demo-us-central1-a.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-c.svc.cluster.local:7100" - -storage: - master: - storageClass: "yb-storage" - tserver: - storageClass: "yb-storage" - -replicas: - master: 1 - tserver: 1 - totalMasters: 3 - -gflags: - master: - placement_cloud: "gke" - placement_region: "us-central1" - placement_zone: "us-central1-a" - tserver: - placement_cloud: "gke" - placement_region: "us-central1" - placement_zone: "us-central1-a" -``` - -Copy the contents below to a file named `overrides-us-central1-b.yaml`. - -```yaml -isMultiAz: True - -AZ: us-central1-b - -masterAddresses: "yb-master-0.yb-masters.yb-demo-us-central1-a.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-c.svc.cluster.local:7100" - -storage: - master: - storageClass: "yb-storage" - tserver: - storageClass: "yb-storage" - -replicas: - master: 1 - tserver: 1 - totalMasters: 3 - -gflags: - master: - placement_cloud: "gke" - placement_region: "us-central1" - placement_zone: "us-central1-b" - tserver: - placement_cloud: "gke" - placement_region: "us-central1" - placement_zone: "us-central1-b" -``` - -Copy the contents below to a file named `overrides-us-central1-c.yaml`. - -```yaml -isMultiAz: True - -AZ: us-central1-c - -masterAddresses: "yb-master-0.yb-masters.yb-demo-us-central1-a.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-c.svc.cluster.local:7100" - -storage: - master: - storageClass: "yb-storage" - tserver: - storageClass: "yb-storage" - -replicas: - master: 1 - tserver: 1 - totalMasters: 3 - -gflags: - master: - placement_cloud: "gke" - placement_region: "us-central1" - placement_zone: "us-central1-c" - tserver: - placement_cloud: "gke" - placement_region: "us-central1" - placement_zone: "us-central1-c" -``` - -### Install YugabyteDB - -Install YugabyteDB in the Kubernetes cluster using the commands below. - -For Helm, you have to first create the 3 namespaces. - -```sh -$ kubectl create namespace yb-demo-us-central1-a -$ kubectl create namespace yb-demo-us-central1-b -$ kubectl create namespace yb-demo-us-central1-c -``` - -Now create the overall YugabyteDB cluster in such a way that one third of the nodes are hosted in each zone. - -```sh -$ helm install yb-demo-us-central1-a yugabytedb/yugabyte \ - --version {{}} \ - --namespace yb-demo-us-central1-a \ - -f overrides-us-central1-a.yaml --wait -``` - -```sh -$ helm install yb-demo-us-central1-b yugabytedb/yugabyte \ - --version {{}} \ - --namespace yb-demo-us-central1-b \ - -f overrides-us-central1-b.yaml --wait -``` - -```sh -$ helm install yb-demo-us-central1-c yugabytedb/yugabyte \ - --version {{}} \ - --namespace yb-demo-us-central1-c \ - -f overrides-us-central1-c.yaml --wait -``` - -## 3. Check the cluster status - -You can check the status of the cluster using various commands noted below. - -Check the pods. - -```sh -$ kubectl get pods --all-namespaces -``` - -```output -NAMESPACE NAME READY STATUS RESTARTS AGE -... -yb-demo-us-central1-a yb-master-0 2/2 Running 0 6m54s -yb-demo-us-central1-a yb-tserver-0 2/2 Running 0 6m55s -yb-demo-us-central1-b yb-master-0 2/2 Running 0 3m56s -yb-demo-us-central1-b yb-tserver-0 2/2 Running 0 3m57s -yb-demo-us-central1-c yb-master-0 2/2 Running 0 100s -yb-demo-us-central1-c yb-tserver-0 2/2 Running 0 100s -``` - -Check the services. - -```sh -$ kubectl get services --all-namespaces -``` - -```output -NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -... -yb-demo-us-central1-a yb-master-ui LoadBalancer 10.27.249.152 34.71.83.45 7000:31927/TCP 9m33s -yb-demo-us-central1-a yb-masters ClusterIP None 7100/TCP,7000/TCP 9m33s -yb-demo-us-central1-a yb-tserver-service LoadBalancer 10.27.255.103 34.71.106.168 6379:31373/TCP,9042:32627/TCP,5433:30983/TCP 9m33s -yb-demo-us-central1-a yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 9m33s -yb-demo-us-central1-b yb-master-ui LoadBalancer 10.27.240.40 35.188.198.123 7000:32217/TCP 6m35s -yb-demo-us-central1-b yb-masters ClusterIP None 7100/TCP,7000/TCP 6m35s -yb-demo-us-central1-b yb-tserver-service LoadBalancer 10.27.255.60 34.71.140.1 6379:30036/TCP,9042:31514/TCP,5433:31103/TCP 6m35s -yb-demo-us-central1-b yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 6m35s -yb-demo-us-central1-c yb-master-ui LoadBalancer 10.27.247.234 34.68.203.224 7000:31090/TCP 4m18s -yb-demo-us-central1-c yb-masters ClusterIP None 7100/TCP,7000/TCP 4m18s -yb-demo-us-central1-c yb-tserver-service LoadBalancer 10.27.243.195 35.223.214.205 6379:31689/TCP,9042:31639/TCP,5433:32685/TCP 4m18s -yb-demo-us-central1-c yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 4m18s -``` - -Access the yb-master Admin UI for the cluster at `http://:7000` where `external-ip` refers to one of the `yb-master-ui` services. Note that you can use any of the above three services for this purpose since all of them will show the same cluster metadata. - -![mz-ybmaster](/images/deploy/kubernetes/gke-multizone-ybmaster.png) - -## 4. Configure zone-aware replica placement - -Default replica placement policy treats every yb-tserver as equal irrespective of its `placement_*` setting. Go to `http://:7000/cluster-config` to confirm that the default configuration is still in effect. - -![before-zoneaware](/images/deploy/kubernetes/gke-aws-multizone-before-zoneaware.png) - -To make the replica placement zone-aware, so that one replica is placed in each zone, run the following command: - -```sh -$ kubectl exec -it -n yb-demo-us-central1-a yb-master-0 -- bash \ --c "/home/yugabyte/master/bin/yb-admin --master_addresses yb-master-0.yb-masters.yb-demo-us-central1-a.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-b.svc.cluster.local:7100,yb-master-0.yb-masters.yb-demo-us-central1-c.svc.cluster.local:7100 modify_placement_info gke.us-central1.us-central1-a,gke.us-central1.us-central1-b,gke.us-central1.us-central1-c 3" -``` - -To see the new configuration, go to `http://:7000/cluster-config` to see the new configuration. - -![after-zoneaware](/images/deploy/kubernetes/gke-multizone-after-zoneaware.png) - -## 5. Connect using YugabyteDB shells - -To connect and use the YSQL Shell (ysqlsh), run the following command. - -```sh -$ kubectl exec -n yb-demo-us-central1-a -it yb-tserver-0 -- ysqlsh \ - -h yb-tserver-0.yb-tservers.yb-demo-us-central1-a -``` - -To open the YCQL Shell (ycqlsh), run the following command: - -```sh -$ kubectl exec -n yb-demo-us-central1-a -it yb-tserver-0 -- ycqlsh \ -yb-tserver-0.yb-tservers.yb-demo-us-central1-a -``` - -You can follow the [Explore YSQL](/preview/quick-start/explore/ysql/) tutorial and then go to the `http://:7000/tablet-servers` page of the yb-master Admin UI to confirm that tablet peers and their leaders are placed evenly across all three zones for both user data and system data. - -![mz-ybtserver](/images/deploy/kubernetes/gke-multizone-ybtserver.png) - -## 6. Connect using external clients - -To connect an external program, get the load balancer `EXTERNAL-IP` address of the `yb-tserver-service` service and connect using port 5433 for YSQL or port 9042 for YCQL, as follows: - -```sh -$ kubectl get services --namespace yb-demo -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -... -yb-tserver-service LoadBalancer 10.98.36.163 35.225.153.214 6379:30929/TCP,9042:30975/TCP,5433:30048/TCP 10s -... -``` diff --git a/docs/content/preview/deploy/kubernetes/single-zone/_index.md b/docs/content/preview/deploy/kubernetes/single-zone/_index.md deleted file mode 100644 index dafd8b36f7b7..000000000000 --- a/docs/content/preview/deploy/kubernetes/single-zone/_index.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Deploy YugabyteDB on a single-zone Kubernetes cluster -headerTitle: Deploy on single-zone Kubernetes -linkTitle: Single-zone -description: Deploy on single-zone Kubernetes -headcontent: Deploy YugabyteDB on a single-zone Kubernetes cluster -menu: - preview: - identifier: deploy-kubernetes-sz - parent: deploy-kubernetes - weight: 621 -type: indexpage ---- - -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/deploy/kubernetes/single-zone/aks/helm-chart.md b/docs/content/preview/deploy/kubernetes/single-zone/aks/helm-chart.md deleted file mode 100644 index c596df08df6e..000000000000 --- a/docs/content/preview/deploy/kubernetes/single-zone/aks/helm-chart.md +++ /dev/null @@ -1,290 +0,0 @@ ---- -title: Deploy on Azure Kubernetes Service (AKS) using Helm chart -headerTitle: Azure Kubernetes Service (AKS) -linkTitle: Azure Kubernetes Service (AKS) -description: Use Helm chart to deploy a single-zone YugabyteDB cluster on Azure Kubernetes Service (AKS). -menu: - preview: - parent: deploy-kubernetes-sz - name: Azure Kubernetes Service - identifier: k8s-aks-1 - weight: 624 -aliases: - - /preview/deploy/kubernetes/aks/ - - /preview/deploy/kubernetes/aks/helm-chart/ - - /preview/deploy/kubernetes/single-zone/aks/ -type: docs ---- - - - - -You can deploy a YugabyteDB cluster on Azure Kubernetes Service (AKS). - -Microsoft's [Azure Kubernetes Service](https://azure.microsoft.com/en-au/services/kubernetes-service/) provides a fully-managed Kubernetes service able to host their applications on containers in the cloud. - -## Prerequisites - -Before deploying YugabyteDB on AKS, verify that the following components are installed and configured: - -- `kubectl` - - For more information, see [Install and Set Up kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/). - - [Kubernetes API](https://kubernetes.io/docs/reference/kubernetes-api/) -- Helm 3.4 or later - - For more information, see [Installing Helm](https://helm.sh/docs/intro/install/). - -- [Microsoft Azure](https://azure.microsoft.com/en-au/pricing/purchase-options/pay-as-you-go/) account with Pay As You Go enabled. - -## Deploy YugabyteDB on an Azure Kubernetes cluster - -The following examples are based on using macOS. - -### Step 1: Install the Azure CLI - -To install the Azure CLI on your local operating system, follow the instructions provided in [Install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest). - -On macOS, you can run the following Homebrew command to install Azure CLI: - -```sh -brew install azure-cli -``` - -After the Azure CLI is installed, use the following command to log in at the command line: - -```sh -az login -``` - -After entering this command, a browser window appears for you to select the Azure credentials you are using. - -You are logged into Microsoft Azure and can use the Azure CLI with your subscription. -For more information, see [Azure CLI documentation](https://docs.microsoft.com/en-us/cli/azure/?view=azure-cli-latest). - -### Step 2: Create a Resource Group - -To create a resource group, you need to choose the location to host it. Run the following command to retrieve a list of the available locations: - -```sh -az account list-locations -``` - -For the purposes of this example, the location is “West US”: - -```output.json -{ - "displayName": "West US", - "id": "/subscriptions/53f36dd9-85d8-4690-b45b-92733d97e6c3/locations/westus", - "latitude": "37.783", - "longitude": "-122.417", - "name": "westus", - "subscriptionId": null -}, -``` - -Create the resource group by running the following command, specifying the location: - -```sh -az group create --name yugabytedbRG --location westus -``` - -```output.json -{ - "id": "/subscriptions/53f36dd9-85d8-4690-b45b-92733d97e6c3/resourceGroups/yugabytedbRG", - "location": "westus", - "managedBy": null, - "name": "yugabytedbRG", - "properties": { - "provisioningState": "Succeeded" - }, - "tags": null, - "type": "Microsoft.Resources/resourceGroups" -} -``` - -You should be able to see the yugabytedbRG resource group in the Azure portal by clicking **Resource groups**, as per the following illustration: - -![Resource Groups at Microsoft Azure Portal](/images/deploy/kubernetes/aks/aks-resource-groups.png) - -### Step 3: Create the Kubernetes cluster - -Create a Kubernetes cluster by running the following command: - -```sh -az aks create \ ---resource-group yugabytedbRG \ ---name yugabytedbAKSCluster \ ---node-count 3 \ ---node-vm-size Standard_D4_v3 \ ---enable-addons monitoring \ ---generate-ssh-keys -``` - -Note that because you have not [specified any zones](https://docs.microsoft.com/en-us/azure/aks/availability-zones) in the preceding command, the AKS control plane components for the cluster will be deployed in a single zone. - -The `--generate-ssh-keys` argument auto-generates SSH public and private key files to be stored in the `~/.ssh` directory. - -Expect to see the following output: - -```output -Finished service principal creation[###################] 100.0000% - - Running .. -``` - -`yugabytedbAKSCluster` should be available in the Azure UI, as per the following illustration: - -![yugabytedbRG](/images/deploy/kubernetes/aks/aks-resource-group-cluster.png) - -To create the cluster and use your own SSH keys, run the following command: - -```sh -ssh-keygen -t rsa -b 2048 -``` - -Follow the prompts to create the ` id_rsa ` and `id_rsa.pub` files, and record their location. Run the following command: - -```sh -az aks create \ ---resource-group yugabytedbRG \ ---name yugabytedbAKSCluster \ ---node-count 3 \ ---node-vm-size Standard_D4_v3 \ ---enable-addons monitoring \ ---ssh-key-value id_rsa.pub -``` - -After the cluster is installed, point `kubectl` to the cluster by running the following command: - -```sh -az aks get-credentials --resource-group yugabytedbRG --name yugabytedbAKSCluster -``` - -You should see an output similar to the following: - -```output -Merged "yugabytedbAKSCluster" as current context in /Users/yugabyte-user/.kube/config -``` - -If you generated your own SSH keys, point `kubectl` to the cluster by running the following command instead: - -```sh -az aks get-credentials --resource-group yugabytedbRG --name yugabytedbAKSCluster -ssh-key-file id_rsa -``` - -Verify that the cluster nodes are running using the following command: - -```sh -kubectl get nodes -``` - -You should see an output similar to the following: - -![alt_text](/images/deploy/kubernetes/aks/aks-kubectl-get-nodes.png) - -You can also view the details of the cluster in the Kubernetes dashboard by running the following two commands: - -```sh -kubectl create clusterrolebinding yb-kubernetes-dashboard --clusterrole=cluster-admin --serviceaccount=kube-system:kubernetes-dashboard --user=clusterUser -``` - -```sh -az aks browse --resource-group yugabytedbRG --name yugabytedbAKSCluster -``` - -A browser window appears where you can view the Kubernetes dashboard, as per the following illustration: - -![Kubernetes Dashboard](/images/deploy/kubernetes/aks/aks-kubernetes-dashboard.png) - -### Step 4: Install YugabyteDB using Helm chart - -You need to perform a number of steps to deploy YugabyteDB using Helm chart: - -1. Add the YugabyteDB `charts` repository by running the following commands: - - ```sh - helm repo add yugabytedb https://charts.yugabyte.com - ``` - - Get the latest update from the `charts` repository by running the following `helm` command: - - ```sh - helm repo update - ``` - - ```output - Hang tight while we grab the latest from your chart repositories... - ...Successfully got an update from the "yugabytedb" chart repository - ``` - - ```sh - helm search repo yugabytedb/yugabyte --version {{}} - ``` - - ```output - NAME CHART VERSION APP VERSION DESCRIPTION - yugabytedb/yugabyte {{}} {{}} YugabyteDB is the high-performance distributed ... - ``` - -1. To create the `yb-demo` namespace, run the following command. - - ```sh - kubectl create namespace yb-demo - ``` - - The following message should appear: - - ```output - namespace/yb-demo created - ``` - -1. Install YugabyteDB in the `yb-demo` namespace by running the following commands to specify settings for resource constrained environments: - - ```sh - helm install yb-demo -n yb-demo yugabytedb/yugabyte \ - --version {{}} \ - --set storage.master.count=1 \ - --set storage.tserver.count=1 \ - --set storage.master.storageClass=default \ - --set storage.tserver.storageClass=default \ - --set resource.master.requests.cpu=1 \ - --set resource.master.requests.memory=1Gi \ - --set resource.tserver.requests.cpu=1 \ - --set resource.tserver.requests.memory=1Gi \ - --set resource.master.limits.cpu=1 \ - --set resource.master.limits.memory=1Gi \ - --set resource.tserver.limits.cpu=1 \ - --set resource.tserver.limits.memory=1Gi \ - --timeout=15m - ``` - - Depending on your resources, it may take some time to get everything installed, deployed, and configured. - - After you see a `success` message, you can verify that the YugabyteDB pods are running by using the following command: - - ```sh - kubectl get pods --namespace yb-demo - ``` - - ![Verify pods are running](/images/deploy/kubernetes/aks/aks-verify-pods-running.png) - - To access the YugabyteDB Admin UI, run the following command to locate the **External IP** entry associated with `yb-master-ui` and port `7000`: - - ```sh - kubectl get services --namespace yb-demo - ``` - - Navigate to `http://:7000`, replacing `` with your external IP address. You should see the following: - - ![YugabyteDB Admin UI](/images/deploy/kubernetes/aks/aks-admin-ui.png) diff --git a/docs/content/preview/deploy/kubernetes/single-zone/aks/statefulset-yaml.md b/docs/content/preview/deploy/kubernetes/single-zone/aks/statefulset-yaml.md deleted file mode 100644 index 81e035bb13fe..000000000000 --- a/docs/content/preview/deploy/kubernetes/single-zone/aks/statefulset-yaml.md +++ /dev/null @@ -1,199 +0,0 @@ ---- -title: Deploy on Azure Kubernetes Service (AKS) using StatefulSet YAML -headerTitle: Azure Kubernetes Service (AKS) -linkTitle: Azure Kubernetes Service (AKS) -description: Use StatefulSet YAML to deploy a single-zone Kubernetes cluster on Azure Kubernetes Service (AKS). -menu: - preview: - parent: deploy-kubernetes-sz - name: Azure Kubernetes Service - identifier: k8s-aks-2 - weight: 624 -aliases: - - /preview/deploy/kubernetes/aks/statefulset-yaml/ -type: docs ---- - - - - - -## Prerequisites - -Before deploying YugabyteDB on AKS, perform the following: - -- Connect to the Azure Cloud Shell. See [Azure bash cloud shell](https://shell.azure.com/bash). - -- Register the necessary Azure service providers by running the following: - - ```sh - az provider register -n Microsoft.Network - az provider register -n Microsoft.Storage - az provider register -n Microsoft.Compute - az provider register -n Microsoft.ContainerService - ``` - -- Execute the following command to configure a default location. Remember to replace `eastus` with an appropriate Azure location (region) of your choice that supports AKS clusters: - - ```sh - az configure --defaults location=eastus - ``` - -## Create an Azure cluster - -Create an Azure resource group, a logical group in which Azure resources are deployed and managed. - -Execute the following command to specify a default location or pass the location parameter to create the resource: - -```sh -az group create --name yb-eastus-resource -``` - - The resources you create for the AKS cluster will live in this Azure resource. - -Create a three-node AKS cluster by running the following command: - -```sh -az aks create --resource-group yb-eastus-resource --name yb-aks-cluster --node-count 3 --generate-ssh-keys -``` - -Configure `kubectl` to work with this cluster, as follows: - -```sh -az aks get-credentials --resource-group yb-eastus-resource --name yb-aks-cluster -``` - -Verify the cluster by running the following command: - -```sh -kubectl get nodes -``` - -```output -NAME STATUS ROLES AGE VERSION -aks-nodepool1-25019584-0 Ready agent 4h v1.7.9 -aks-nodepool1-25019584-1 Ready agent 4h v1.7.9 -aks-nodepool1-25019584-2 Ready agent 4h v1.7.9 -``` - -## Create a YugabyteDB cluster - -Create a YugabyteDB cluster by running the following command: - -```sh -curl -s "https://raw.githubusercontent.com/yugabyte/yugabyte-db/master/cloud/kubernetes/yugabyte-statefulset.yaml" | sed "s/storageClassName: standard/storageClassName: default/g" | kubectl create -f - -``` - -```output -service "yb-masters" created -statefulset "yb-master" created -service "yb-tservers" created -statefulset "yb-tserver" created -``` - -## Check the cluster - -Check which pods are running using the following command: - -```sh -kubectl get pods -``` - -```output -NAME READY STATUS RESTARTS AGE -yb-master-0 1/1 Running 0 3m -yb-master-1 1/1 Running 0 3m -yb-master-2 1/1 Running 0 3m -yb-tserver-0 1/1 Running 0 3m -yb-tserver-1 1/1 Running 0 3m -yb-tserver-2 1/1 Running 0 3m -``` - -View the persistent volumes, as follows: - -```sh -kubectl get persistentvolumes -``` - -```output -NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE -pvc-849395f7-36f2-11e8-9445-0a58ac1f27f1 1Gi RWO Delete Bound default/datadir-yb-master-0 default 12m -pvc-8495d8cd-36f2-11e8-9445-0a58ac1f27f1 1Gi RWO Delete Bound default/datadir-yb-master-1 default 12m -pvc-8498b836-36f2-11e8-9445-0a58ac1f27f1 1Gi RWO Delete Bound default/datadir-yb-master-2 default 12m -pvc-84abba1a-36f2-11e8-9445-0a58ac1f27f1 1Gi RWO Delete Bound default/datadir-yb-tserver-0 default 12m -pvc-84af3484-36f2-11e8-9445-0a58ac1f27f1 1Gi RWO Delete Bound default/datadir-yb-tserver-1 default 12m -pvc-84b35d19-36f2-11e8-9445-0a58ac1f27f1 1Gi RWO Delete Bound default/datadir-yb-tserver-2 default 12m -``` - -You can view all the services by running the following command: - -```sh -kubectl get services -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -kubernetes ClusterIP XX.XX.XX.X 443/TCP 23m -yb-masters ClusterIP None 7000/TCP,7100/TCP 17m -yb-tservers ClusterIP None 9000/TCP,9100/TCP,9042/TCP,6379/TCP 14m -``` - -## Connect to the cluster - -To open the YCQL shell (ycqlsh), run the following command: - -```sh -kubectl exec -it yb-tserver-0 -- ycqlsh yb-tserver-0 -``` - -```output -Connected to local cluster at 127.0.0.1:9042. -[ycqlsh 5.0.1 | Cassandra 3.9-SNAPSHOT | CQL spec 3.4.2 | Native protocol v4] -Use HELP for help. -ycqlsh> DESCRIBE KEYSPACES; - -system_schema system_auth system -``` - -## Destroy the YugabyteDB cluster - -You can destroy the YugabyteDB cluster by running the following command: - -```sh -kubectl delete -f https://raw.githubusercontent.com/yugabyte/yugabyte-db/master/cloud/kubernetes/yugabyte-statefulset.yaml -``` - -```output -service "yb-masters" deleted -statefulset "yb-master" deleted -service "yb-tservers" deleted -statefulset "yb-tserver" deleted -``` - -To destroy the persistent volume claims (and lose all the data), run the following commands: - -```sh -$ kubectl delete pvc -l app=yb-master -$ kubectl delete pvc -l app=yb-tserver -``` - -## Destroy the AKS cluster - -To destroy the resource you created for the AKS cluster, run the following: - -```sh -az group delete --name yb-eastus-resource -``` diff --git a/docs/content/preview/deploy/kubernetes/single-zone/eks/helm-chart.md b/docs/content/preview/deploy/kubernetes/single-zone/eks/helm-chart.md deleted file mode 100644 index 4cb36328c8d7..000000000000 --- a/docs/content/preview/deploy/kubernetes/single-zone/eks/helm-chart.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: Deploy single zone on EKS using Helm Chart -linkTitle: Amazon Elastic Kubernetes Service (EKS) -description: Use Helm Chart to deploy a single-zone Kubernetes cluster on Amazon Elastic Kubernetes Service (EKS). -menu: - preview: - parent: deploy-kubernetes-sz - name: Amazon EKS - identifier: k8s-eks-1 - weight: 622 -aliases: - - /preview/deploy/kubernetes/eks/ - - /preview/deploy/kubernetes/eks/helm-chart/ - - /preview/deploy/kubernetes/single-zone/eks/ -type: docs ---- - - - - -Amazon EKS runs multi-zone Kubernetes clusters by default and has no support for single-zone deployments. As described in [Amazon EKS Features](https://aws.amazon.com/eks/features/), the managed control plane runs in multiple availability zones by default to protect cluster administration against zone failures. Similarly, the worker nodes are automatically placed in multiple availability zones of the chosen region to protect the cluster itself from zone failures. - -Refer to the [Multi-zone Amazon EKS](../../../multi-zone/eks/helm-chart/) instructions for getting started with YugabyteDB on Amazon EKS. diff --git a/docs/content/preview/deploy/kubernetes/single-zone/gke/helm-chart.md b/docs/content/preview/deploy/kubernetes/single-zone/gke/helm-chart.md deleted file mode 100644 index d699429d4dad..000000000000 --- a/docs/content/preview/deploy/kubernetes/single-zone/gke/helm-chart.md +++ /dev/null @@ -1,267 +0,0 @@ ---- -title: Use Helm Chart to deploy on Google Kubernetes Engine (GKE) -headerTitle: Google Kubernetes Engine (GKE) -linkTitle: Google Kubernetes Engine (GKE) -description: Use Helm Chart to deploy a single-zone YugabyteDB cluster on Google Kubernetes Engine (GKE). -menu: - preview: - parent: deploy-kubernetes-sz - name: Google Kubernetes Engine - identifier: k8s-gke-1 - weight: 623 -aliases: - - /preview/deploy/kubernetes/gke/helm-chart/ - - /preview/deploy/kubernetes/single-zone/gke/ -type: docs ---- - - - -## Prerequisites - -You must have a Google Kubernetes Engine (GKE) cluster that has Helm configured. If you have not installed the Helm client (`helm`), see [Installing Helm](https://helm.sh/docs/intro/install/). - -The YugabyteDB Helm chart has been tested with the following software versions: - -- GKE running Kubernetes 1.20 or later. The Helm chart you use to install YugabyteDB creates three YB-Master and three YB-TServers, each with 2 CPU cores, for a total of 12 CPU cores. This means you need a Kubernetes cluster with more than 12 CPU cores. If the cluster contains three nodes, then each node should have more than 4 cores. - -- Helm 3.4 or later. -- For optimal performance, ensure you set the appropriate [system limits using `ulimit`](../../../../manual-deployment/system-config/#set-ulimits) on each node in your Kubernetes cluster. - -The following steps show how to meet these prerequisites: - -- Download and install the [Google Cloud SDK](https://cloud.google.com/sdk/downloads/). - -- Configure defaults for Google Cloud. - - Execute the following command to set the project ID to `yugabyte`. You can change this as needed. - - ```sh - gcloud config set project yugabyte - ``` - - Execute the following command to set the default compute zone to `us-west1-b`. You can change this as needed. - - ```sh - gcloud config set compute/zone us-west1-b - ``` - -- Install `kubectl`. Refer to kubectl installation instructions for your [operating system](https://kubernetes.io/docs/tasks/tools/). - - Note that GKE is usually two or three major releases behind the upstream or OSS Kubernetes release. This means you have to make sure that you have the latest kubectl version that is compatible across different Kubernetes distributions. - -- Ensure that `helm` is installed. - - First, check the Helm version, as follows: - - ```sh - helm version - ``` - - Expect to see the output similar to the following. Note that the `tiller` server-side component has been removed in Helm 3. - - ```output - version.BuildInfo{Version:"v3.0.3", GitCommit:"ac925eb7279f4a6955df663a0128044a8a6b7593", GitTreeState:"clean", GoVersion:"go1.13.6"} - ``` - -## Create a GKE cluster - -Create a private Kubernetes cluster by running the following command. - -```sh -gcloud container clusters create cluster_name --enable-private-nodes --machine-type=n1-standard-8 -``` - -Note that you must set up Cloud NAT for a private Kubernetes cluster in Google Cloud to ensure that your cluster can access the internet while its nodes do not have public IP addresses. Refer to [Configuring Private Google Access and Cloud NAT in Google Cloud Platform (GCP)](https://kloudkraft.medium.com/configuring-private-google-access-and-cloud-nat-in-google-cloud-platform-gcp-3c4406b590b3). - -As stated in [Prerequisites](#prerequisites), the default configuration in the YugabyteDB Helm chart requires Kubernetes nodes to have a total of 12 CPU cores and 45 GB RAM allocated to YugabyteDB. This can be three nodes with 4 CPU cores and 15 GB RAM allocated to YugabyteDB. The smallest Google Cloud machine type that meets this requirement is `n1-standard-8` which has 8 CPU cores and 30GB RAM. - -## Create a YugabyteDB cluster - -Creating a YugabyteDB cluster involves a number of steps. - -### Add charts repository - -To add the YugabyteDB charts repository, run the following command: - -```sh -helm repo add yugabytedb https://charts.yugabyte.com -``` - -### Fetch updates from the repository - -Make sure that you have the latest updates to the repository by running the following command: - -```sh -helm repo update -``` - -### Validate the Chart version - -Execute the following command: - -```sh -helm search repo yugabytedb/yugabyte --version {{}} -``` - -Expect the following output: - -```output -NAME CHART VERSION APP VERSION DESCRIPTION -yugabytedb/yugabyte {{}} {{}} YugabyteDB is the high-performance distributed ... -``` - -### Install YugabyteDB - -Run the following commands to create a namespace and then install YugabyteDB: - -```sh -kubectl create namespace yb-demo -helm install yb-demo yugabytedb/yugabyte --version {{}} --namespace yb-demo --wait -``` - -## Check the cluster status - -You can check the status of the cluster using the following command: - -```sh -helm status yb-demo -n yb-demo -``` - -```output -NAME: yb-demo -LAST DEPLOYED: Thu Feb 13 13:29:13 2020 -NAMESPACE: yb-demo -STATUS: deployed -REVISION: 1 -TEST SUITE: None -NOTES: -1. Get YugabyteDB Pods by running this command: - kubectl --namespace yb-demo get pods - -2. Get list of YugabyteDB services that are running: - kubectl --namespace yb-demo get services - -3. Get information about the load balancer services: - kubectl get svc --namespace yb-demo - -4. Connect to one of the tablet server: - kubectl exec --namespace yb-demo -it yb-tserver-0 -- bash - -5. Run YSQL shell from inside of a tablet server: - kubectl exec --namespace yb-demo -it yb-tserver-0 -- ysqlsh -h yb-tserver-0.yb-tservers.yb-demo - -6. Cleanup YugabyteDB Pods - helm delete yb-demo --purge - NOTE: You need to manually delete the persistent volume - kubectl delete pvc --namespace yb-demo -l app=yb-master - kubectl delete pvc --namespace yb-demo -l app=yb-tserver -``` - -Check the pods, as follows: - -```sh -kubectl get pods --namespace yb-demo -``` - -```output -NAME READY STATUS RESTARTS AGE -yb-master-0 1/1 Running 0 4m -yb-master-1 1/1 Running 0 4m -yb-master-2 1/1 Running 0 4m -yb-tserver-0 1/1 Running 0 4m -yb-tserver-1 1/1 Running 0 4m -yb-tserver-2 1/1 Running 0 4m -``` - -Check the services, as follows: - -```sh -kubectl get services --namespace yb-demo -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -yb-master-ui LoadBalancer 10.109.39.242 35.225.153.213 7000:31920/TCP 10s -yb-masters ClusterIP None 7100/TCP,7000/TCP 10s -yb-tserver-service LoadBalancer 10.98.36.163 35.225.153.214 6379:30929/TCP,9042:30975/TCP,5433:30048/TCP 10s -yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 10s -``` - -You can even check the history of the `yb-demo` deployment, as follows: - -```sh -helm history yb-demo -n yb-demo -``` - -```output -REVISION UPDATED STATUS CHART APP VERSION DESCRIPTION -1 Tue Apr 21 17:29:01 2020 deployed yugabyte-{{}} {{}} Install complete -``` - -## Connect using YugabyteDB shells - -To connect and use the YSQL Shell ysqlsh, run the following command: - -```sh -kubectl exec -n yb-demo -it yb-tserver-0 -- ysqlsh -h yb-tserver-0.yb-tservers.yb-demo -``` - -To connect and use the YCQL Shell ycqlsh, run the following command: - -```sh -kubectl exec -n yb-demo -it yb-tserver-0 -- ycqlsh yb-tserver-0.yb-tservers.yb-demo -``` - -## Connect using external clients - -To connect an external program, get the load balancer `EXTERNAL-IP` address of the `yb-tserver-service` service and connect using port 5433 for YSQL or port 9042 for YCQL, as follows: - -```sh -kubectl get services --namespace yb-demo -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -... -yb-tserver-service LoadBalancer 10.98.36.163 35.225.153.214 6379:30929/TCP,9042:30975/TCP,5433:30048/TCP 10s -... -``` - -## Configure cluster - -You can configure the cluster using the same commands and options that are described in [Open Source Kubernetes](../../oss/helm-chart/#configure-cluster). - -### Independent LoadBalancers - -By default, the YugabyteDB Helm chart exposes the client API endpoints, as well as YB-Master UI endpoint using two LoadBalancers. To expose the client APIs using independent LoadBalancers, you can execute the following command: - -```sh -helm install yb-demo yugabytedb/yugabyte -f https://raw.githubusercontent.com/yugabyte/charts/master/stable/yugabyte/expose-all.yaml --version {{}} --namespace yb-demo --wait -``` - -You can also bring up an internal LoadBalancer (for either YB-Master or YB-TServer services), if required. You would need to specify the [annotation](https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer) required for your cloud provider. The following command brings up an internal LoadBalancer for the YB-TServer service in Google Cloud Platform: - -```sh -helm install yugabyte -f https://raw.githubusercontent.com/yugabyte/charts/master/stable/yugabyte/expose-all.yaml --version {{}} --namespace yb-demo --name yb-demo \ - --set annotations.tserver.loadbalancer."cloud\.google\.com/load-balancer-type"=Internal --wait -``` diff --git a/docs/content/preview/deploy/kubernetes/single-zone/gke/statefulset-yaml.md b/docs/content/preview/deploy/kubernetes/single-zone/gke/statefulset-yaml.md deleted file mode 100644 index 7c1f234fc2a7..000000000000 --- a/docs/content/preview/deploy/kubernetes/single-zone/gke/statefulset-yaml.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -title: Deploy on Google Kubernetes Engine (GKE) using YAML (remote disk) -headerTitle: Google Kubernetes Engine (GKE) -linkTitle: Google Kubernetes Engine (GKE) -description: Deploy a single-zone YugabyteDB cluster on Google Kubernetes Engine (GKE) using YAML (remote disk). -menu: - preview: - parent: deploy-kubernetes-sz - name: Google Kubernetes Engine - identifier: k8s-gke-2 - weight: 623 -aliases: - - /preview/deploy/kubernetes/gke/statefulset-yaml -type: docs ---- - - - -## Prerequisites - -Before starting deployment, perform the following: - -- Download and install the [Google Cloud SDK](https://cloud.google.com/sdk/downloads/). - - Note that if you install `gcloud` using a package manager (as opposed to downloading and installing it manually), some of the commands will not be supported. - -- Install `kubectl` command line tool by running the following command: - - ```sh - gcloud components install kubectl - ``` - -- Configure defaults for `gcloud` by setting the project ID as `yugabyte`. You can change this as needed. - - ```sh - gcloud config set project yugabyte - ``` - -- Set the default compute zone as `us-west1-b`. You can change this as needed. - - ```sh - gcloud config set compute/zone us-west1-b - ``` - -## Create a GKE cluster - -Create a private Kubernetes cluster using the following command. - -```sh -gcloud container clusters create cluster_name --enable-private-nodes -``` - -Note that you must set up Cloud NAT for a private Kubernetes cluster in Google Cloud to ensure that your cluster can access the internet while its nodes do not have public IP addresses. Refer to [Configuring Private Google Access and Cloud NAT in Google Cloud Platform (GCP)](https://kloudkraft.medium.com/configuring-private-google-access-and-cloud-nat-in-google-cloud-platform-gcp-3c4406b590b3). - -## Create a YugabyteDB cluster - -Create a YugabyteDB cluster by running the following command: - -```sh -kubectl create -f https://raw.githubusercontent.com/yugabyte/yugabyte-db/master/cloud/kubernetes/yugabyte-statefulset.yaml -``` - -```output -service "yb-masters" created -statefulset "yb-master" created -service "yb-tservers" created -statefulset "yb-tserver" created -``` - -## Check the cluster - -Execute the following command to see the pods running: - -```sh -kubectl get pods -``` - -```output -NAME READY STATUS RESTARTS AGE -yb-master-0 1/1 Running 0 3m -yb-master-1 1/1 Running 0 3m -yb-master-2 1/1 Running 0 3m -yb-tserver-0 1/1 Running 0 3m -yb-tserver-1 1/1 Running 0 3m -yb-tserver-2 1/1 Running 0 3m -``` - -You can view the persistent volumes, as follows: - -```sh -kubectl get persistentvolumes -``` - -```output -NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE -pvc-f3301c41-1110-11e8-8231-42010a8a0083 1Gi RWO Delete Bound default/datadir-yb-master-0 standard 5m -pvc-f33f29b3-1110-11e8-8231-42010a8a0083 1Gi RWO Delete Bound default/datadir-yb-master-1 standard 5m -pvc-f35005b6-1110-11e8-8231-42010a8a0083 1Gi RWO Delete Bound default/datadir-yb-master-2 standard 5m -pvc-f36189ab-1110-11e8-8231-42010a8a0083 1Gi RWO Delete Bound default/datadir-yb-tserver-0 standard 5m -pvc-f366a4af-1110-11e8-8231-42010a8a0083 1Gi RWO Delete Bound default/datadir-yb-tserver-1 standard 5m -pvc-f36d2892-1110-11e8-8231-42010a8a0083 1Gi RWO Delete Bound default/datadir-yb-tserver-2 standard 5m -``` - -You can view all the services by running the following command: - -```sh -kubectl get services -``` - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -kubernetes ClusterIP XX.XX.XX.X 443/TCP 23m -yb-masters ClusterIP None 7000/TCP,7100/TCP 17m -yb-tservers ClusterIP None 9000/TCP,9100/TCP,9042/TCP,6379/TCP 14m -``` - -## Connect to the cluster - -You can connect to the YCQL API by running the following: - -```sh -kubectl exec -it yb-tserver-0 -- ycqlsh yb-tserver-0 -``` - -```output -Connected to local cluster at 127.0.0.1:9042. -[cqlsh 5.0.1 | Cassandra 3.9-SNAPSHOT | CQL spec 3.4.2 | Native protocol v4] -Use HELP for help. -ycqlsh> DESCRIBE KEYSPACES; - -system_schema system_auth system -``` - -## Destroy cluster - -Destroy the YugabyteDB cluster you created above by running the following: - -```sh -kubectl delete -f https://raw.githubusercontent.com/yugabyte/yugabyte-db/master/cloud/kubernetes/yugabyte-statefulset.yaml -``` - -```output -service "yb-masters" deleted -statefulset "yb-master" deleted -service "yb-tservers" deleted -statefulset "yb-tserver" deleted -``` - -To destroy the persistent volume claims and lose all the data, run the following: - -```sh -kubectl delete pvc -l app=yb-master -kubectl delete pvc -l app=yb-tserver -``` - -## Destroy the GKE cluster - -To destroy the machines you created for the `gcloud` cluster, run the following command: - -```sh -gcloud container clusters delete yugabyte -``` diff --git a/docs/content/preview/deploy/kubernetes/single-zone/oss/helm-chart.md b/docs/content/preview/deploy/kubernetes/single-zone/oss/helm-chart.md deleted file mode 100644 index 1a1006b9db3a..000000000000 --- a/docs/content/preview/deploy/kubernetes/single-zone/oss/helm-chart.md +++ /dev/null @@ -1,455 +0,0 @@ ---- -title: Deploy on OSS Kubernetes using Helm Chart -headerTitle: Open source Kubernetes -linkTitle: Open source Kubernetes -description: Deploy a YugabyteDB cluster on OSS Kubernetes using Helm Chart. -aliases: - - /preview/deploy/kubernetes/oss/ - - /preview/deploy/kubernetes/oss/helm-chart/ - - /preview/deploy/kubernetes/single-zone/oss/ - - /preview/deploy/kubernetes/helm-chart/ - - /preview/deploy/kubernetes/helm-configuration/ -menu: - preview: - parent: deploy-kubernetes-sz - name: Open Source - identifier: k8s-oss-1 - weight: 621 -type: docs ---- - - - -[Helm](https://helm.sh/) is an open source packaging tool that helps install applications and services on Kubernetes. It uses a packaging format called `charts`. A Helm chart is a package containing all resource definitions necessary to create an instance of a Kubernetes application, tool, or service in a Kubernetes cluster. - -## Prerequisites - -You must have a Kubernetes cluster that has Helm configured. If you have not installed the Helm client (`helm`), see [Install Helm](https://helm.sh/docs/intro/install/). - -The YugabyteDB Helm chart has been tested with the following software versions: - -- Kubernetes 1.20 or later with nodes such that a total of 12 CPU cores and 18 GB RAM can be allocated to YugabyteDB. This can be three nodes with 4 CPU core and 6 GB RAM allocated to YugabyteDB. -- Helm 3.4 or later. -- YugabyteDB Docker image (yugabytedb/yugabyte) 2.1.0 or later -- For optimal performance, ensure you have set the appropriate [system limits using `ulimit`](../../../../manual-deployment/system-config/#set-ulimits) on each node in your Kubernetes cluster. - -Confirm that `helm` and `kubectl` are configured correctly, as follows: - -```sh -helm version -``` - -```output -version.BuildInfo{Version:"v3.2.1", GitCommit:"fe51cd1e31e6a202cba7dead9552a6d418ded79a", GitTreeState:"clean", GoVersion:"go1.13.10"} -``` - -```sh -kubectl version -``` - -## Create cluster - -Creating a cluster includes adding a repository for charts and updating this repository, checking the version, and installing YugabyteDB. - -### Add charts repository - -To add the YugabyteDB charts repository, run the following command: - -```sh -helm repo add yugabytedb https://charts.yugabyte.com -``` - -### Fetch updates from the repository - -Make sure that you have the latest updates to the repository by running the following command: - -```sh -helm repo update -``` - -### Validate the chart version - -To check the chart version, run the following command: - -```sh -helm search repo yugabytedb/yugabyte --version {{}} -``` - -Expect output similar to the following: - -```output -NAME CHART VERSION APP VERSION DESCRIPTION -yugabytedb/yugabyte {{}} {{}} YugabyteDB is the high-performance distributed ... -``` - -### Install YugabyteDB - -Install YugabyteDB in the Kubernetes cluster using the commands described in the following sections. - -#### On multi-node Kubernetes - -Create a namespace and then install YugabyteDB, as follows: - -```sh -kubectl create namespace yb-demo - -helm install yb-demo yugabytedb/yugabyte --version {{}} --namespace yb-demo --wait -``` - -#### On Minikube - -If you are running in a resource-constrained environment or a local environment, such as Minikube, you have to change the default resource requirements. - -Create a `yb-demo` namespace, as follows: - -```sh -kubectl create namespace yb-demo - -helm install yb-demo yugabytedb/yugabyte \ ---version {{}} \ ---set resource.master.requests.cpu=0.5,resource.master.requests.memory=0.5Gi,\ -resource.tserver.requests.cpu=0.5,resource.tserver.requests.memory=0.5Gi --namespace yb-demo -``` - -Note that in Minikube, the LoadBalancers for `yb-master-ui` and `yb-tserver-service` will remain in pending state as load balancers are not available in a Minikube environment. If you would like to disable these services, pass the `enableLoadBalancer=False` flag, as follows: - -```sh -helm install yb-demo yugabytedb/yugabyte \ ---version {{}} \ ---set resource.master.requests.cpu=0.5,resource.master.requests.memory=0.5Gi,\ -resource.tserver.requests.cpu=0.5,resource.tserver.requests.memory=0.5Gi,\ -enableLoadBalancer=False --namespace yb-demo -``` - -In some environments, such as macOS, Minikube may run inside a virtual machine. Make sure to configure the VM with at least 4 CPUs and 5 GB memory so the cluster has room to start up. The following is an example command: - -```sh -minikube start --cpus 4 --memory 5120 -``` - -## Check the cluster status - -You can check the status of the cluster using the following commands: - -```sh -helm status yb-demo -n yb-demo -``` - -Expect output similar to the following: - -```output -NAME: yb-demo -LAST DEPLOYED: Thu Feb 13 13:29:13 2020 -NAMESPACE: yb-demo -STATUS: deployed -REVISION: 1 -TEST SUITE: None -NOTES: -1. Get YugabyteDB Pods by running this command: - kubectl --namespace yb-demo get pods - -2. Get list of YugabyteDB services that are running: - kubectl --namespace yb-demo get services - -3. Get information about the load balancer services: - kubectl get svc --namespace yb-demo - -4. Connect to one of the tablet server: - kubectl exec --namespace yb-demo -it yb-tserver-0 -- bash - -5. Run YSQL shell from inside of a tablet server: - kubectl exec --namespace yb-demo -it yb-tserver-0 -- ysqlsh -h yb-tserver-0.yb-tservers.yb-demo - -6. Cleanup YugabyteDB Pods - helm delete yb-demo --purge - NOTE: You need to manually delete the persistent volume - kubectl delete pvc --namespace yb-demo -l app=yb-master - kubectl delete pvc --namespace yb-demo -l app=yb-tserver -``` - -Check the pods, as follows: - -```sh -kubectl get pods --namespace yb-demo -``` - -Expect output similar to the following: - -```output -NAME READY STATUS RESTARTS AGE -yb-master-0 2/2 Running 0 4m -yb-master-1 2/2 Running 0 4m -yb-master-2 2/2 Running 0 4m -yb-tserver-0 2/2 Running 0 4m -yb-tserver-1 2/2 Running 0 4m -yb-tserver-2 2/2 Running 0 4m -``` - -Check the services, as follows: - -```sh -kubectl get services --namespace yb-demo -``` - -Expect output similar to the following: - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -yb-master-ui LoadBalancer 10.109.39.242 35.225.153.213 7000:31920/TCP 10s -yb-masters ClusterIP None 7100/TCP,7000/TCP 10s -yb-tserver-service LoadBalancer 10.98.36.163 35.225.153.214 6379:30929/TCP,9042:30975/TCP,5433:30048/TCP 10s -yb-tservers ClusterIP None 7100/TCP,9000/TCP,6379/TCP,9042/TCP,5433/TCP 10s -``` - -You can also check the history of the `yb-demo` deployment, as follows: - -```sh -helm history yb-demo -n yb-demo -``` - -Expect output similar to the following: - -```output -REVISION UPDATED STATUS CHART APP VERSION DESCRIPTION -1 Thu Apr 13 13:29:13 2020 deployed yugabyte-2.13.0 2.13.0.1-b2 Install complete -``` - -## Connect using YugabyteDB shells - -To connect and use the YSQL Shell (ysqlsh), run the following command: - -```sh -kubectl exec -n yb-demo -it yb-tserver-0 -- ysqlsh -h yb-tserver-0.yb-tservers.yb-demo -``` - -To connect and use the YCQL Shell (ycqlsh), run the following command: - -```sh -kubectl exec -n yb-demo -it yb-tserver-0 -- ycqlsh yb-tserver-0.yb-tservers.yb-demo -``` - -## Connect using external clients - -To connect an external program, get the load balancer `EXTERNAL-IP` address of the `yb-tserver-service` service and connect using port 5433 for YSQL or port 9042 for YCQL, as follows: - -```sh -kubectl get services --namespace yb-demo -``` - -Expect output similar to the following: - -```output -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -... -yb-tserver-service LoadBalancer 10.98.36.163 35.225.153.214 6379:30929/TCP,9042:30975/TCP,5433:30048/TCP 10s -... -``` - -## Configure cluster - -Instead of using the default values in the Helm chart, you can modify the configuration of the YugabyteDB cluster according to your requirements. - -### CPU, memory, and replica count - -The default values for the Helm chart are in the `helm/yugabyte/values.yaml` file. The following is a listing of the most important values. As noted in [Prerequisites](#prerequisites), the defaults are set for a 3-node Kubernetes cluster, each node with 4 CPU cores and 6 GB RAM allocated to YugabyteDB. - -```yaml -storage: - master: - count: 2 - size: 10Gi - storageClass: standard - tserver: - count: 2 - size: 10Gi - storageClass: standard - -resource: - master: - requests: - cpu: 2 - memory: 2Gi - limits: - cpu: 2 - memory: 2Gi - tserver: - requests: - cpu: 2 - memory: 4Gi - limits: - cpu: 2 - memory: 4Gi - -replicas: - master: 3 - tserver: 3 - -partition: - master: 3 - tserver: 3 -``` - -If you want to change the defaults, you can use the following command. You can even do `helm install` instead of `helm upgrade` when you are installing on a Kubernetes cluster with configuration different than the defaults: - -```sh -helm upgrade --set resource.tserver.requests.cpu=8,resource.tserver.requests.memory=15Gi yb-demo ./yugabyte -``` - -Replica count can be changed using the following command. Note that only the YB-TServers need to be scaled in a replication factor 3 cluster which keeps the masters count at `3`: - -```sh -helm upgrade --set replicas.tserver=5 yb-demo ./yugabyte -``` - -### Readiness probes - -Readiness probes provide readiness checks for your Kubernetes deployment. Probes are compatible with both direct Helm deployments and [YugabyteDB Anywhere-managed deployments](../../../../../yugabyte-platform/create-deployments/create-universe-multi-zone-kubernetes/#helm-overrides), and work with TLS enabled or restricted authorization environments. Use the probes to ensure pods are ready before being marked as available. The probes verify connectivity using ysqlsh for YSQL and ycqlsh for YCQL. - -The following probes are available: - -- YSQL Readiness. Uses ysqlsh to verify connectivity via local socket for credentialed setups. - -- YCQL Readiness. Uses ycqlsh to validate connectivity. - -- Master Readiness. Uses `httpGet` to probe master. - -- Custom Readiness. Supports custom readiness probe parameters, such as delays, timeouts, and thresholds. - -- Startup probes to delay enforcing of readiness probes. - -Readiness probes are disabled by default for compatibility. - -Enable probes via `values.yaml` or using Kubernetes overrides. - -```yaml -master: - readinessProbe: - enabled: true - -tserver: - readinessProbe: - enabled: true -``` - -The following is example of custom readiness parameters: - -```yaml -tserver: - customReadinessProbe: - initialDelaySeconds: 30 - periodSeconds: 20 - timeoutSeconds: 10 -``` - -### Independent LoadBalancers - -By default, the YugabyteDB Helm chart exposes the client API endpoints and master UI endpoint using two load balancers. If you want to expose the client APIs using independent LoadBalancers, you can execute the following command: - -```sh -helm install yb-demo yugabytedb/yugabyte -f https://raw.githubusercontent.com/yugabyte/charts/master/stable/yugabyte/expose-all.yaml --version {{}} --namespace yb-demo --wait -``` - -You can also bring up an internal load balancer (for either YB-Master or YB-TServer services), if required. To do so, you specify the [annotation](https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer) required for your cloud provider. See [Amazon EKS](../../eks/helm-chart/) and [Google Kubernetes Engine](../../gke/helm-chart/) for examples. - -### Reserved LoadBalancer IP Addresses - -If you intend to use a preallocated (reserved) IP for the exposed YB-Master and YB-TServer services, you need to specify the load balancer IP. If you do not set this IP, a so-called ephemeral, semi-random IP will be allocated. - -The following is an example of the `values-overrides.yaml` file that allows you to override IP values in the Helm charts: - -```properties -serviceEndpoints: - - name: "yb-master-ui" - app: "yb-master" - loadBalancerIP: "11.11.11.11" - type: "LoadBalancer" - ports: - http-ui: "7000" - - name: "yb-tserver-service" - app: "yb-tserver" - loadBalancerIP: "22.22.22.22" - type: "LoadBalancer" - ports: - tcp-yql-port: "9042" - tcp-ysql-port: "5433" -``` - -You apply the override by executing the following Helm command: - -```sh -helm install yb-demo ./yugabyte -f values-overrides.yaml -``` - -Assuming that you already reserved the IP addresses (11.11.11.11 and 22.22.22.22), `yb-master-ui` and `yb-tserver-service` will use the predetermined addresses. - -Note that setting the load balancer IP can result in behavior that might not be entirely consistent across cloud providers. - -### Storage class - -If you want to use a storage class other than the standard class for your deployment, provision the storage class and then pass in the name of the class while running the helm install command, as follows: - -```sh -helm install yugabyte --version {{}} --namespace yb-demo --name yb-demo --set storage.master.storageClass=,storage.tserver.storageClass= --wait -``` - -### Configure YB-Master and YB-TServer pods - -Flags on the YB-Master and YB-TServer pods can be specified via the command line or by overriding the `values.yaml` file in the charts repository. The following example shows how to set the three geo-distribution-related flags `placement_cloud`, `placement_region`, and `placement_zone` on a Minikube cluster: - -```sh -helm install yb-demo yugabytedb/yugabyte \ ---version {{}} \ ---set resource.master.requests.cpu=0.5,resource.master.requests.memory=0.5Gi,\ -resource.tserver.requests.cpu=0.5,resource.tserver.requests.memory=0.5Gi,\ -gflags.master.placement_cloud=myk8s-cloud,gflags.master.placement_region=myk8s-region,gflags.master.placement_zone=myk8s-zone,\ -gflags.tserver.placement_cloud=myk8s-cloud,gflags.tserver.placement_region=myk8s-region,gflags.tserver.placement_zone=myk8s-zone\ - --namespace yb-demo -``` - -## Upgrade the software version of YugabyteDB - -You can upgrade the software on the YugabyteDB cluster with the following command. By default, this performs a [rolling update](https://github.com/yugabyte/charts/blob/853d7ac744cf6d637b5877f4681940825beda8f6/stable/yugabyte/values.yaml#L60) of the pods. - -```sh -helm repo update -helm upgrade yb-demo yugabytedb/yugabyte --version {{}} --wait -n yb-demo -``` - -Then finalize the upgrade as follows: - -```sh -kubectl exec -it yb-master-0 -- /home/yugabyte/bin/yb-admin --master_addresses yb-master-0.yb-masters.default.svc.cluster.local:7100 finalize_upgrade -``` - -## Update the configuration of YugabyteDB pods - -You can update most settings in the helm chart by running a `helm upgrade` with the new values. By default, this performs a [rolling update](https://github.com/yugabyte/charts/blob/853d7ac744cf6d637b5877f4681940825beda8f6/stable/yugabyte/values.yaml#L60) of the pods. - -```sh -helm upgrade yb-demo yugabytedb/yugabyte --set resource.tserver.requests.cpu=4 --wait -n yb-demo -``` - -## Delete cluster - -To delete the cluster, you need to purge the Helm chart, and then delete the PVCs, as follows: - -```sh -helm uninstall yb-demo -n yb-demo -``` - -```sh -kubectl delete pvc --namespace yb-demo --all -``` diff --git a/docs/content/preview/deploy/manual-deployment/_index.md b/docs/content/preview/deploy/manual-deployment/_index.md deleted file mode 100644 index 3ee0fa466024..000000000000 --- a/docs/content/preview/deploy/manual-deployment/_index.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Manual deployment of YugabyteDB clusters -headerTitle: Single data center deployments -linkTitle: Single-DC deployments -description: Deploy YugabyteDB manually in a single region or private data center using basic administration commands. -headcontent: Deploy YugabyteDB in a single region or private data center -menu: - preview: - identifier: deploy-manual-deployment - parent: deploy - weight: 20 -type: indexpage ---- - -This section describes generic deployment of a YugabyteDB cluster in a single region or data center with a multi-zone/multi-rack configuration. Note that single zone configuration is a special case of multi-zone where all placement related flags are set to the same value across every node. - -For AWS deployments specifically, a [step-by-step guide](../public-clouds/aws/manual-deployment/) to deploying a YugabyteDB cluster is also available. These steps can be adapted for on-premises deployments or deployments in other clouds. - -{{}} - - {{}} - - {{}} - - {{}} - - {{}} - -{{}} diff --git a/docs/content/preview/deploy/multi-dc/_index.md b/docs/content/preview/deploy/multi-dc/_index.md deleted file mode 100644 index d7ba348568bc..000000000000 --- a/docs/content/preview/deploy/multi-dc/_index.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Multi-DC deployments -headerTitle: Multi-DC deployment -linkTitle: Multi-DC deployments -description: Deploy YugabyteDB across multiple data centers or cloud regions -headcontent: Deploy YugabyteDB across multiple data centers (DC) -menu: - preview: - identifier: multi-dc - parent: deploy - weight: 30 -type: indexpage ---- -YugabyteDB is a geo-distributed SQL database that can be deployed across multiple data centers (DCs) or cloud regions. There are two primary configurations for such multi-DC deployments. - -The first configuration uses a single universe stretched across 3 or more data centers with data getting automatically sharded across all data centers. This configuration is default for [Spanner-inspired databases](../../architecture/docdb/) like YugabyteDB. Data replication across data centers is synchronous and is based on the Raft consensus protocol. This means writes are globally consistent and reads are either globally consistent or timeline consistent (when application clients use follower reads). Additionally, resilience against data center failures is fully automatic. This configuration has the potential to incur Wide Area Network (WAN) latency in the write path if the data centers are geographically located far apart from each other and are connected through the shared/unreliable Internet. - -For users not requiring global consistency and automatic resilience to data center failures, the WAN latency can be eliminated altogether through the second configuration where two independent, single-DC universes are connected through xCluster replication based on [Change Data Capture](../../architecture/docdb-replication/change-data-capture/). - -[9 Techniques to Build Cloud-Native, Geo-Distributed SQL Apps with Low Latency](https://www.yugabyte.com/blog/9-techniques-to-build-cloud-native-geo-distributed-sql-apps-with-low-latency/) highlights the various multi-DC deployment strategies for a distributed SQL database like YugabyteDB. Note that YugabyteDB is the only Spanner-inspired distributed SQL database to support a 2DC deployment. - - diff --git a/docs/content/preview/deploy/multi-dc/async-replication/_index.md b/docs/content/preview/deploy/multi-dc/async-replication/_index.md deleted file mode 100644 index 8690461dd046..000000000000 --- a/docs/content/preview/deploy/multi-dc/async-replication/_index.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: xCluster deployments -headerTitle: xCluster deployment -linkTitle: xCluster -description: Deploy unidirectional (master-follower) or bidirectional (multi-master) asynchronous replication between two universes -headContent: Unidirectional (master-follower) and bidirectional (multi-master) replication -menu: - preview: - identifier: async-replication - parent: multi-dc - weight: 610 -type: indexpage ---- -By default, YugabyteDB provides synchronous replication and strong consistency across geo-distributed data centers. However, many use cases do not require synchronous replication or justify the additional complexity and operating costs associated with managing three or more data centers. A cross-universe (xCluster) deployment provides asynchronous replication across two data centers or cloud regions. Using an xCluster deployment, you can use unidirectional (master-follower) or bidirectional (multi-master) asynchronous replication between two universes (aka data centers). - -For information on xCluster deployment architecture, replication scenarios, and limitations, refer to [xCluster architecture](../../../architecture/docdb-replication/async-replication/). - -{{}} - - {{}} - - {{}} - -{{}} - -## Prerequisites - -- If the root certificates for the source and target universe are different, (for example, the node certificates for target and source nodes were not created on the same machine), copy the `ca.crt` for the source universe to all target nodes, and vice-versa. If the root certificate for both source and target universes is the same, you can skip this step. - - Locate the `ca.crt` file for the source universe on any source universe node at `/certs/ca.crt`. Copy this file to all target nodes at `/certs/xcluster//` (create the directory if it is not there). The `` must be the same as the replication ID you are using for your xCluster configuration. - - Similarly, copy the `ca.crt` file for the target universe from any target universe node at `/certs/ca.crt` to the source universe nodes at `/certs/xcluster//` (create the directory if it is not there). - -- Global objects like users, roles, tablespaces are not managed by xCluster. You must explicitly create and manage these objects on both source and target universes. - -- For moving data out of YugabyteDB, set up CDC on the xCluster source universe. CDC on the xCluster target universe is not supported. CDC is not supported in bi-directional xCluster setups. - -## Best practices - -- Set the YB-TServer [cdc_wal_retention_time_secs](../../../reference/configuration/all-flags-yb-tserver/#cdc-wal-retention-time-secs) flag to 86400 on both source and target universe. - - This flag determines the duration for which WAL is retained on the source universe in case of a network partition or a complete outage of the target universe. The value depends on how long a network partition of the source universe or an outage of the target universe can be tolerated. - -- Make sure all YB-Master and YB-TServer flags are set to the same value on both the source and target universes. - -- Monitor CPU usage and ensure it remains below 65%. Note that xCluster replication typically incurs a 20% CPU overhead. - -- Monitor disk space usage and ensure it remains below 65%. Allocate sufficient disk space to accommodate WALs generated based on the `cdc_wal_retention_time_secs` setting, which is higher than the default [log_min_seconds_to_retain](../../../reference/configuration/yb-tserver/#log-min-seconds-to-retain) value. diff --git a/docs/content/preview/deploy/public-clouds/_index.md b/docs/content/preview/deploy/public-clouds/_index.md deleted file mode 100644 index 4d8825c2e125..000000000000 --- a/docs/content/preview/deploy/public-clouds/_index.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -title: Deploy YugabyteDB clusters in public clouds -headerTitle: Public clouds -linkTitle: Public clouds -description: Deploy YugabyteDB clusters in public clouds, including Amazon Web Services (AWS), Google Cloud Platform (GCP), and Microsoft Azure. -headcontent: Deploy YugabyteDB in public clouds -aliases: - - /deploy/public-clouds/ -menu: - preview: - identifier: public-clouds - parent: deploy - weight: 40 -type: indexpage ---- - diff --git a/docs/content/preview/deploy/public-clouds/aws/terraform.md b/docs/content/preview/deploy/public-clouds/aws/terraform.md deleted file mode 100644 index c5afd89ce359..000000000000 --- a/docs/content/preview/deploy/public-clouds/aws/terraform.md +++ /dev/null @@ -1,174 +0,0 @@ ---- -title: Deploy on Amazon Web Services using Terraform -headerTitle: Amazon Web Services -linkTitle: Amazon Web Services -description: Deploy YugabyteDB clusters on Amazon Web Services using Terraform. -menu: - preview: - identifier: deploy-in-aws-2-terraform - parent: public-clouds - weight: 630 -type: docs ---- - - - -## Prerequisites - -Download and install [Terraform](https://www.terraform.io/downloads.html). - -Verify the installation using the `terraform` command. - -```sh -$ terraform -``` - -You should see output similar to the following. - -```output -Usage: terraform [--version] [--help] [args] -... -Common commands: - apply Builds or changes infrastructure - console Interactive console for Terraform interpolations - destroy Destroy Terraform-managed infrastructure - env Workspace management - fmt Rewrites config files to canonical format -``` - -## Create a Terraform configuration file - -Create a Terraform configuration file called `yugabyte-db-config.tf` and add the following details to it. The Terraform module can be found in the [terraform-aws-yugabyte GitHub repository](https://github.com/yugabyte/terraform-aws-yugabyte). - -```terraform -provider "aws" { - # Configure your AWS account credentials here. - access_key = "ACCESS_KEY_HERE" - secret_key = "SECRET_KEY_HERE" - region = "us-west-2" -} - -module "yugabyte-db-cluster" { - # The source module used for creating AWS clusters. - source = "github.com/Yugabyte/terraform-aws-yugabyte" - - # The name of the cluster to be created, change as per need. - cluster_name = "test-cluster" - - # Existing custom security group to be passed so that you can connect to the instances. - # Make sure this security group allows your local machine to SSH into these instances. - custom_security_group_id="SECURITY_GROUP_HERE" - - # AWS key pair that you want to use to ssh into the instances. - # Make sure this key pair is already present in the noted region of your account. - ssh_keypair = "SSH_KEYPAIR_HERE" - ssh_private_key = "SSH_PRIVATE_KEY_PATH_HERE" - - # Existing vpc and subnet ids where the instances should be spawned. - vpc_id = "VPC_ID_HERE" - subnet_ids = ["SUBNET_ID_HERE"] - - # Replication factor of the YugabyteDB cluster. - replication_factor = "3" - - # The number of nodes in the cluster, this cannot be lower than the replication factor. - num_instances = "3" - - # The AWS region for the cluster to be created - region_name = "REGION_NAME_HERE" - - # The availability zones for the instances, the length - # of AZs must match num_instances and can contain duplicates. - availability_zones = ["AZ1_HERE", "AZ2_HERE", "AZ3_HERE"] -} -``` - -If you do not have a custom security group, you would need to remove the `${var.custom_security_group_id}` variable in `main.tf`, so that the `aws_instance` looks as follows: - -```terraform -resource "aws_instance" "yugabyte_nodes" { - count = "${var.num_instances}" - ... - vpc_security_group_ids = [ - "${aws_security_group.yugabyte.id}", - "${aws_security_group.yugabyte_intra.id}", - "${var.custom_security_group_id}" - ] -``` - -## Create a cluster - -Init terraform first if you have not already done so. - -```sh -$ terraform init -``` - -Run the following to create the instances and bring up the cluster: - -```sh -$ terraform apply -``` - -After the cluster is created, you can go to the URL `http://:7000` to view the UI. You can find the node's IP or DNS by running the following: - -```sh -$ terraform state show aws_instance.yugabyte_nodes[0] -``` - -You can check the state of the nodes at any point by running the following command: - -```sh -$ terraform show -``` - -## Verify resources created - -The following resources are created by this module: - -- `module.yugabyte-db-cluster.aws_instance.yugabyte_nodes` - - The AWS instances. - - For a cluster named `test-cluster`, the instances are named `yb-ce-test-cluster-n1`, `yb-ce-test-cluster-n2`, `yb-ce-test-cluster-n3`. - -- `module.yugabyte-db-cluster.aws_security_group.yugabyte` - - The security group that allows the various clients to access the YugabyteDB cluster. - - For a cluster named `test-cluster`, this security group is named `yb-ce-test-cluster`, with the ports 7000, 9000, 9042, and 6379 open to all other instances in the same security group. - -- `module.yugabyte-db-cluster.aws_security_group.yugabyte_intra` - - The security group that allows communication internal to the cluster. - - For a cluster named `test-cluster`, this security group is named `yb-ce-test-cluster-intra` with the ports 7100, 9100 open to all other instances in the same security group. - -- `module.yugabyte-db-cluster.null_resource.create_yugabyte_universe` A local script that configures the newly created instances to form a new YugabyteDB universe. - -## [Optional] Destroy the cluster - -To destroy what you just created, you can run the following command: - -```sh -$ terraform destroy -``` diff --git a/docs/content/preview/deploy/public-clouds/azure/terraform.md b/docs/content/preview/deploy/public-clouds/azure/terraform.md deleted file mode 100644 index ea13e88599c9..000000000000 --- a/docs/content/preview/deploy/public-clouds/azure/terraform.md +++ /dev/null @@ -1,174 +0,0 @@ ---- -title: Deploy on Microsoft Azure using Terraform -headerTitle: Microsoft Azure -linkTitle: Microsoft Azure -description: Use Terraform to deploy YugabyteDB on Microsoft Azure. -menu: - preview: - identifier: deploy-in-azure-3-terraform - parent: public-clouds - weight: 650 -type: docs ---- - - - -## Prerequisites - -Download and install [Terraform](https://www.terraform.io/downloads.html). - -Verify the installation using the `terraform` command. - -```sh -$ terraform -``` - -You should see output similar to the following. - -```output -Usage: terraform [--version] [--help] [args] -... -Common commands: - apply Builds or changes infrastructure - console Interactive console for Terraform interpolations - destroy Destroy Terraform-managed infrastructure - env Workspace management - fmt Rewrites config files to canonical format -``` - -## Set the Azure credentials - -Export the required credentials in your current shell using the following commands: - -```sh -echo "Setting environment variables for Terraform" -export ARM_SUBSCRIPTION_ID="your_subscription_id" -export ARM_CLIENT_ID="your_appId" -export ARM_CLIENT_SECRET="your_password" -export ARM_TENANT_ID="your_tenant_id" -``` - - -For instructions on installing Terraform and configuring it for Azure, see [Quickstart: Configure Terraform in Azure Cloud Shell with Bash](https://docs.microsoft.com/en-gb/azure/virtual-machines/linux/terraform-install-configure). - -## Create a Terraform configuration file - -Create a Terraform configuration file named `yugabyte-db-config.tf` and add the following details to it. The Terraform module can be found in the [terraform-azure-yugabyte](https://github.com/yugabyte/terraform-azure-yugabyte) GitHub repository. - -```terraform -module "yugabyte-db-cluster" -{ - # The source module used for creating clusters on Azure. - source = "github.com/Yugabyte/terraform-azure-yugabyte" - - # The name of the cluster to be created, change as per need. - cluster_name = "test-cluster" - - # key pair. - ssh_private_key = "PATH_TO_SSH_PRIVATE_KEY_FILE" - ssh_public_key = "PATH_TO_SSH_PUBLIC_KEY_FILE" - ssh_user = "SSH_USER_NAME" - - # The region name where the nodes should be spawned. - region_name = "YOUR VPC REGION" - - # The name of resource group in which all Azure resource will be created. - resource_group = "test-yugabyte" - - # Replication factor. - replication_factor = "3" - - # The number of nodes in the cluster, this cannot be lower than the replication factor. - node_count = "3" -} - -output "outputs" -{ - value = module.yugabyte-db-cluster -} -``` - -## Create a cluster - -Initialize terraform first, if you have not already done so. - -```sh -$ terraform init -``` - -Now, run the following to create the instances and bring up the cluster. - -```sh -$ terraform apply -``` - -After the cluster is created, you can go to the URL `http://:7000` to view the UI. You can find the node's public IP address by running the following: - -```sh -$ terraform state show module.yugabyte-db-cluster.azurerm_public_ip.YugaByte_Public_IP[0] -``` - -You can access the cluster UI by going to public IP address of any of the instances at port `7000`. The IP address can be viewed by replacing `0` in the preceding command with the desired index. - -You can check the state of the nodes at any point by running the following command: - -```sh -$ terraform show -``` - -## Verify resources created - -The following resources are created by this module: - -- `module.azure-yugabyte.azurerm_virtual_machine.Yugabyte-Node` - - The Azure VM instances. - - For a cluster named `test-cluster`, the instances are named `yugabyte-test-cluster-node-1`, `yugabyte-test-cluster-node-2`, and `yugabyte-test-cluster-node-3`. - -- `module.azure-yugabyte.azurerm_network_security_group.Yugabyte-SG` - - The security group that allows the various clients to access the YugabyteDB cluster. - - For a cluster named `test-cluster`, this security group is named `yugabyte-test-cluster-SG`, with the ports 7000, 9000, 9042, 7100, 9200, and 6379 open to all other instances in the same security group. - -- `module.azure-yugabyte.null_resource.create_yugabyte_universe` - - A local script that configures the newly created instances to form a new YugabyteDB universe. - -- `module.azure-yugabyte.azurerm_network_interface.Yugabyte-NIC` - - The Azure network interface for VM instance. - - For a cluster named `test-cluster`, the network interface is named `yugabyte-test-cluster-NIC-1`, `yugabyte-test-cluster-NIC-2`, and `yugabyte-test-cluster-NIC-3`. - -## Destroy the cluster [optional] - -To destroy what you just created, you can run the following command: - -```sh -$ terraform destroy -``` diff --git a/docs/content/preview/deploy/public-clouds/gcp/terraform.md b/docs/content/preview/deploy/public-clouds/gcp/terraform.md deleted file mode 100644 index 7a5c39dc7ece..000000000000 --- a/docs/content/preview/deploy/public-clouds/gcp/terraform.md +++ /dev/null @@ -1,171 +0,0 @@ ---- -title: Deploy YugabyteDB in Google Cloud Platform with Terraform -headerTitle: Google Cloud Platform -linkTitle: Google Cloud Platform -description: Use Terraform to deploy a YugabyteDB cluster in Google Cloud Platform. -aliases: - - /preview/deploy/public-clouds/gcp/ -menu: - preview: - identifier: deploy-in-gcp-3-terraform - parent: public-clouds - weight: 640 -type: docs ---- - - - -## Prerequisites - -Download and install [Terraform](https://www.terraform.io/downloads.html). - -Verify the installation using the `terraform` command. - -```sh -$ terraform -``` - -You should see output similar to the following. - -```output -Usage: terraform [--version] [--help] [args] -... -Common commands: - apply Builds or changes infrastructure - console Interactive console for Terraform interpolations - destroy Destroy Terraform-managed infrastructure - env Workspace management - fmt Rewrites config files to canonical format -``` - -## Create a terraform configuration file - -1. Create a terraform file with provider details. - - ```terraform - provider "google" - { - # Provide your Creadentilals - credentials = "${file("yugabyte-pcf-bc8114281026.json")}" - - # The name of your GCP project - project = "" - } - ``` - - To obtain your credentials file, refer to [Getting started with authentication](https://cloud.google.com/docs/authentication/getting-started) in the GCP documentation. - -1. Add the Yugabyte Terraform module to your file. - - ```terraform - module "yugabyte-db-cluster" - { - source = "github.com/Yugabyte/terraform-gcp-yugabyte.git" - - # The name of the cluster to be created. - cluster_name = "test-cluster" - - # key pair. - ssh_private_key = "SSH_PRIVATE_KEY_HERE" - ssh_public_key = "SSH_PUBLIC_KEY_HERE" - ssh_user = "SSH_USER_NAME_HERE" - - # The region name where the nodes should be spawned. - region_name = "YOUR_VPC_REGION" - - # Replication factor. - replication_factor = "3" - - # The number of nodes in the cluster, this cannot be lower than the replication factor. - node_count = "3" - } - ``` - -## Create a cluster - -Init terraform first if you haven't already done so. - -```sh -$ terraform init -``` - -To check what changes are going to happen in your environment, run the following: - -```sh -$ terraform plan -``` - -Run the following to create the instances and bring up the cluster: - -```sh -$ terraform apply -``` - -After the cluster is created, go to the URL `http://:7000` to view the UI. You can find the node's IP or DNS by running the following: - -```sh -$ terraform state show google_compute_instance.yugabyte_node[0] -``` - -You can check the state of the nodes at any point by running the following command: - -```sh -$ terraform show -``` - -## Verify resources created - -The following resources are created by this module: - -- `module.terraform-gcp-yugabyte.google_compute_instance.yugabyte_node` - - The GCP VM instances. - - For a cluster named `test-cluster`, the instances are named `yugabyte-test-cluster-n1`, `yugabyte-test-cluster-n2`, and `yugabyte-test-cluster-n3`. - -- `module.terraform-gcp-yugabyte.google_compute_firewall.Yugabyte-Firewall` - - The firewall rule that allows various clients to access the YugabyteDB cluster. - - For a cluster named `test-cluster`, this firewall rule is named `default-yugabyte-test-cluster-firewall` with the ports 7000, 9000, 9042, and 6379 open to all. - -- `module.terraform-gcp-yugabyte.google_compute_firewall.Yugabyte-Intra-Firewall` - - The firewall rule that allows communication internal to the cluster. - - For a cluster named `test-cluster`, this firewall rule is named `default-yugabyte-test-cluster-intra-firewall` with the ports 7100 and 9100 open to all other VM instances in the same network. - -- `module.terraform-gcp-yugabyte.null_resource.create_yugabyte_universe` - - A local script that configures the newly created instances to form a new YugabyteDB universe. - -## Destroy the cluster (optional) - -To destroy what you just created, run the following command: - -```sh -$ terraform destroy -``` diff --git a/docs/content/preview/develop/_index.md b/docs/content/preview/develop/_index.md deleted file mode 100644 index f8c4e59e636a..000000000000 --- a/docs/content/preview/develop/_index.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: Develop applications -headerTitle: Develop -linkTitle: Develop -description: Build YugabyteDB application that use ecosystem integrations and GraphQL. -headcontent: Get started building applications based on YugabyteDB -type: indexpage -cascade: - unversioned: true ---- - -## Application development - -Although building scalable applications on top of YugabyteDB is straightforward, you need to understand certain fundamental concepts like transactions, search, and more to make the best use of them. - -{{}} -To learn how to build applications on top of YugabyteDB, see [Learn app development](./learn/). -{{}} - -## Drivers and ORMs - -To communicate with YugabyteDB, applications need to use drivers. Applications can also be built using Object-Relational mappings, a technique used to communicate with the database using object-oriented techniques. We've tested various drivers and ORMs in multiple languages with the optimal configurations to get your applications up and running. - -{{}} -For the list of drivers and ORMs with sample code, see [Drivers and ORMs](../drivers-orms/). -{{}} - -## Data modeling - -Although YugabyteDB is fully SQL compatible, modeling data for a distributed database is quite different from modeling for a monolithic database like MySQL or PostgreSQL. This is because the table data is distributed across different nodes. You must understand how to model your data for efficient storage and retrieval from a distributed system. - -{{}} -To understand how to model your data for YugabyteDB, see [Distributed data modeling](./data-modeling/). -{{}} - -## Global applications - -Today's applications have to cater to users distributed across the globe. Running applications across multiple data centers while providing the best user experience is no trivial task. Yugabyte provides some battle-tested design patterns for your global applications. - -{{}} -To learn more about building global applications, see [Build global applications](./build-global-apps/). -{{}} - -## Multi-cloud applications - -A multi-cloud strategy provides the flexibility to use the optimal computing environment for each specific workload, helps avoid vendor lock-in, lets you place data close to the users, and can minimize cost by choosing optimal pricing and performance of various cloud providers. You can also opt for a hybrid model as your path to migration onto the cloud. - -{{}} -To understand how to build a multi-cloud setup with YugabyteDB, see [Build multi-cloud applications](./multi-cloud/). -{{}} - -## Best practices - -Use these best practices to build distributed applications on top of YugabyteDB; this includes a list of techniques that you can adopt to make your application perform its best. - -{{}} -For more details, see [Best practices](./best-practices-develop). -{{}} - -## Quality of service - -Although YugabyteDB can scale horizontally when needed, it also includes safety measures and settings such as rate-limiting, admission control, transaction priorities, and more, to ensure applications can maintain a high quality of service for all users when the systems comes under heavy load. - -{{}} -To learn more about how to use rate-limiting and other features, see [Quality of service](./quality-of-service/). -{{}} - -## Cloud-native development - -Cloud-native development refers to building and running applications that fully exploit the advantages of cloud computing without needing to install any software on your development machine. Two prominent tools for cloud-native development environments are Gitpod and GitHub Codespaces. Both provide cloud-based development environments, but they have their own features and use cases. - -{{}} -To learn more about how to use browser-based IDEs, see [Cloud-native development](./gitdev/). -{{}} - -## Tutorials - -Yugabyte provides multiple step-by-step guides for building scalable and fault-tolerant applications with YugabyteDB using your favorite programming language, services, and frameworks, including Kafka, Gen-AI, and more. - -{{}} -For step-by-step guides for various frameworks, see [Tutorials](/preview/tutorials/). -{{}} diff --git a/docs/content/preview/develop/best-practices-develop/_index.md b/docs/content/preview/develop/best-practices-develop/_index.md deleted file mode 100644 index 38745ca0948b..000000000000 --- a/docs/content/preview/develop/best-practices-develop/_index.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: Best practices for applications -headerTitle: Best practices -linkTitle: Best practices -description: Tips and tricks to build applications -headcontent: Tips and tricks to build applications for high performance and availability -aliases: - - /preview/develop/best-practices-ysql/ -menu: - preview: - identifier: best-practices-develop - parent: develop - weight: 80 -type: indexpage ---- - -## YSQL - -{{}} - - {{}} - - {{}} - -{{}} - -## YCQL - -{{}} - - {{}} - -{{}} diff --git a/docs/content/preview/develop/best-practices-develop/best-practices-ycql.md b/docs/content/preview/develop/best-practices-develop/best-practices-ycql.md deleted file mode 100644 index 00b032af74c3..000000000000 --- a/docs/content/preview/develop/best-practices-develop/best-practices-ycql.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -title: Best practices for YCQL applications -headerTitle: Best practices for YCQL applications -linkTitle: YCQL applications -description: Tips and tricks to build YCQL applications -headcontent: Tips and tricks to build YCQL applications for high performance and availability -aliases: - - /preview/develop/best-practices-ycql/ -menu: - preview: - identifier: best-practices-ycql - parent: best-practices-develop - weight: 40 -type: docs ---- - -To build high-performance and scalable applications using YCQL, developers should follow key schema design and operational best practices tailored for YugabyteDB's distributed architecture. This guide covers strategies for using indexes efficiently, optimizing read/write paths with batching and prepared statements, managing JSON and collection data types, and ensuring memory settings align with your query layer. These practices help ensure reliable performance, especially under real-world workloads. - -## Global secondary indexes - -Indexes use multi-shard transactional capability of YugabyteDB and are global and strongly consistent (ACID). To add secondary indexes, you need to create tables with [transactions enabled](../../../api/ycql/ddl_create_table/#table). They can also be used as materialized views by using the [INCLUDE clause](../../../api/ycql/ddl_create_index#included-columns). - -## Unique indexes - -YCQL supports [unique indexes](../../../api/ycql/ddl_create_index#unique-index). A unique index disallows duplicate values from being inserted into the indexed columns. - -## Covering indexes - -When querying by a secondary index, the original table is consulted to get the columns that aren't specified in the index. This can result in multiple random reads across the main table. - -Sometimes, a better way is to include the other columns that you're querying that are not part of the index using the [INCLUDE clause](../../../api/ycql/ddl_create_index/#included-columns). When additional columns are included in the index, they can be used to respond to queries directly from the index without querying the table. - -This turns a (possible) random read from the main table to just a filter on the index. - -## Atomic read modify write operations with UPDATE IF EXISTS - -For operations like `UPDATE ... IF EXISTS` and `INSERT ... IF NOT EXISTS` that require an atomic read-modify-write, Apache Cassandra uses LWT which requires 4 round-trips between peers. These operations are supported in YugabyteDB a lot more efficiently, because of YugabyteDB's CP (in the CAP theorem) design based on strong consistency, and require only a single Raft-round trip between peers. Number and counter types work the same and don't need a separate "counters" table. - -## JSONB - -YugabyteDB supports the [JSONB](../../../api/ycql/type_jsonb/) data type to model JSON data, which does not have a set schema and might change often. You can use JSONB to group less accessed columns of a table. YCQL also supports JSONB expression indexes that can be used to speed up data retrieval that would otherwise require scanning the JSON entries. - -{{< note title="Use JSONB columns only when necessary" >}} - -JSONB columns are slower to read and write compared to normal columns. They also take more space because they need to store keys in strings and make keeping data consistency more difficult. A good schema design is to keep most columns as regular columns or collections, and use JSONB only for truly dynamic values. Don't create a `data jsonb` column where you store everything; instead, use a `dynamic_data jsonb` column with the others being primitive columns. - -{{< /note >}} - -## Increment and decrement numeric types - -In YugabyteDB, YCQL extends Apache Cassandra to add increment and decrement operators for integer data types. [Integers](../../../api/ycql/type_int) can be set, inserted, incremented, and decremented while `COUNTER` can only be incremented or decremented. YugabyteDB implements CAS(compare-and-set) operations in one round trip, compared to four for Apache Cassandra. - -## Expire older records automatically with TTL - -YCQL supports automatic expiration of data using the [TTL feature](../../../api/ycql/ddl_create_table/#use-table-property-to-define-the-default-expiration-time-for-rows). You can set a retention policy for data at table/row/column level and the older data is automatically purged from the database. - -If configuring TTL for a time series dataset or any dataset with a table-level TTL, it is recommended for CPU and space efficiency to expire older files directly by using TTL-specific configuration options. More details can be found in [Efficient data expiration for TTL](../../learn/ttl-data-expiration-ycql/#efficient-data-expiration-for-ttl). - -{{}} -TTL does not apply to transactional tables and so, its unsupported in that context. -{{}} - -## Use YugabyteDB drivers - -Use YugabyteDB-specific [client drivers](../../../drivers-orms/) because they are cluster- and partition-aware, and support `jsonb` columns. - -## Leverage connection pooling in the YCQL client - -A single client (for example, a multi-threaded application) should ideally use a single cluster object. The single cluster object typically holds underneath the covers a configurable number of connections to YB-TServers. Typically 1 or 2 connections per YB-TServer suffices to serve even 64-128 application threads. The same connection can be used for multiple outstanding requests, also known as multiplexing. - -See also [Connection pooling](https://docs.datastax.com/en/developer/java-driver/4.6/manual/core/pooling/) in the DataStax Java Driver documentation. - -## Use prepared statements - -Whenever possible, use prepared statements to ensure that YugabyteDB partition-aware drivers can route queries to the tablet leader, to improve throughput, and eliminate the need for a server to parse the query on each operation. - -## Use batching for higher throughput - -Use batching for writing a set of operations to send all operations in a single RPC call instead of using multiple RPC calls, one per operation. Each batch operation has higher latency compared to single-row operations but has higher throughput overall. - -## Column and row sizes - -For consistent latency/performance, keep columns in the 2 MB range or less. - -Big columns add up when selecting multiple columns or full rows. For consistent latency and performance, keep the size of individual rows in the 32 MB range or less. - -## Don't use big collections - -Collections are designed for storing small sets of values that are not expected to grow to arbitrary size (such as phone numbers or addresses for a user rather than posts or messages). While collections of larger sizes are allowed, they may have a significant impact on performance for queries involving them. In particular, some list operations (insert at an index and remove elements) require a read-before-write. - -## Collections with many elements - -Each element inside a collection ends up as a [separate key value](../../../architecture/docdb/data-model#examples) in DocDB adding per-element overhead. - -If your collections are immutable, or you update the whole collection in full, consider using the JSONB data type. An alternative would also be to use ProtoBuf or FlatBuffers and store the serialized data in a BLOB column. - -## Use partition_hash for large table scans - -`partition_hash` function can be used for querying a subset of the data to get approximate row counts or to break down full-table operations into smaller sub-tasks that can be run in parallel. See [example usage](../../../api/ycql/expr_fcall#partition-hash-function) along with a working Python script. - -## TRUNCATE tables instead of DELETE - -[TRUNCATE](../../../api/ycql/dml_truncate/) deletes the database files that store the table and is much faster than [DELETE](../../../api/ycql/dml_delete/) which inserts a _delete marker_ for each row in transactions and they are removed from storage when a compaction runs. - -## Memory and tablet limits - -If you are not using YSQL, ensure the [use_memory_defaults_optimized_for_ysql](../../../reference/configuration/yb-master/#use-memory-defaults-optimized-for-ysql) flag is set to false. This flag optimizes YugabyteDB's memory setup for YSQL, reserving a considerable amount of memory for PostgreSQL; if you are not using YSQL then that memory is wasted when it could be helping improve performance by allowing more data to be cached. - -Note that although the default setting is false, when creating a new universe using yugabyted or YugabyteDB Anywhere, the flag is set to true, unless you explicitly set it to false. - -See [Memory division flags](../../../reference/configuration/yb-tserver/#memory-division-flags) for more information. diff --git a/docs/content/preview/develop/best-practices-develop/clients.md b/docs/content/preview/develop/best-practices-develop/clients.md deleted file mode 100644 index 95a66bf63256..000000000000 --- a/docs/content/preview/develop/best-practices-develop/clients.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -title: Best practices for YSQL clients -headerTitle: Best practices for YSQL clients -linkTitle: YSQL clients -description: Tips and tricks for administering YSQL clients -headcontent: Tips and tricks for administering YSQL clients -menu: - preview: - identifier: best-practices-ysql-clients - parent: best-practices-develop - weight: 20 -type: docs ---- - -Client-side configuration plays a critical role in the performance, scalability, and resilience of YSQL applications. This guide highlights essential best practices for managing connections, balancing load across nodes, and handling failovers efficiently using YugabyteDB's smart drivers and connection pooling. Whether you're deploying in a single region or across multiple data centers, these tips will help ensure your applications make the most of YugabyteDB's distributed architecture - -## Load balance and failover using smart drivers - -YugabyteDB [smart drivers](../../../drivers-orms/smart-drivers/) provide advanced cluster-aware load-balancing capabilities that enable your applications to send requests to multiple nodes in the cluster by connecting to one node. You can also set a fallback hierarchy by assigning priority to specific regions and ensuring that connections are made to the region with the highest priority, and then fall back to the region with the next priority in case the high-priority region fails. - -{{}} -For more information, see [Load balancing with smart drivers](https://www.yugabyte.com/blog/multi-region-database-deployment-best-practices/#load-balancing-with-smart-driver). -{{}} - -## Make sure the application uses new nodes - -When a cluster is expanded, newly added nodes do not automatically start to receive client traffic. Regardless of the language of the driver or whether you are using a smart driver, the application must either explicitly request new connections or, if it is using a pooling solution, it can configure the pooler to recycle connections periodically (for example, by setting maxLifetime and/or idleTimeout). - -## Scale your application with connection pools - -Set up different pools with different load balancing policies as needed for your application to scale by using popular pooling solutions such as HikariCP and Tomcat along with YugabyteDB [smart drivers](../../../drivers-orms/smart-drivers/). - -{{}} -For more information, see [Connection pooling](../../../drivers-orms/smart-drivers/#connection-pooling). -{{}} - -### Database migrations and connection pools - -In some cases, connection pools may trigger unexpected errors while running a sequence of database migrations or other DDL operations. - -Because YugabyteDB is distributed, it can take a while for the result of a DDL to fully propagate to all caches on all nodes in a cluster. As a result, after a DDL statement completes, the next DDL statement that runs right afterwards on a different PostgreSQL connection may, in rare cases, see errors such as `duplicate key value violates unique constraint "pg_attribute_relid_attnum_index"` (see issue {{}}). It is recommended to use a single connection while running a sequence of DDL operations, as is common with application migration scripts with tools such as Flyway or Active Record. - -## Use YSQL Connection Manager - -YugabyteDB includes a built-in connection pooler, YSQL Connection Manager {{}}, which provides the same connection pooling advantages as other external pooling solutions, but without many of their limitations. As the manager is bundled with the product, it is convenient to manage, monitor, and configure the server connections. - -For more information, refer to the following: - -- [YSQL Connection Manager](../../../additional-features/connection-manager-ysql/) -- [Built-in Connection Manager Turns Key PostgreSQL Weakness into a Strength](https://www.yugabyte.com/blog/connection-pooling-management/) diff --git a/docs/content/preview/develop/best-practices-develop/data-modeling-perf.md b/docs/content/preview/develop/best-practices-develop/data-modeling-perf.md deleted file mode 100644 index 20f0878248b5..000000000000 --- a/docs/content/preview/develop/best-practices-develop/data-modeling-perf.md +++ /dev/null @@ -1,257 +0,0 @@ ---- -title: Best practices for Data Modeling and performance of YSQL applications -headerTitle: Best practices for Data Modeling and performance of YSQL applications -linkTitle: YSQL data modeling -description: Tips and tricks for building YSQL applications -headcontent: Tips and tricks for building YSQL applications -menu: - preview: - identifier: data-modeling-perf - parent: best-practices-develop - weight: 10 -type: docs ---- - -Designing efficient, high-performance YSQL applications requires thoughtful data modeling and an understanding of how YugabyteDB handles distributed workloads. This guide offers a collection of best practices, from leveraging colocation and indexing techniques to optimizing transactions and parallelizing queries, that can help you build scalable, globally distributed applications with low latency and high availability. Whether you're developing new applications or tuning existing ones, these tips will help you make the most of YSQL's capabilities - -## Use application patterns - -Running applications in multiple data centers with data split across them is not a trivial task. When designing global applications, choose a suitable design pattern for your application from a suite of battle-tested design paradigms, including [Global database](../../build-global-apps/global-database), [Multi-master](../../build-global-apps/active-active-multi-master), [Standby cluster](../../build-global-apps/active-active-single-master), [Duplicate indexes](../../build-global-apps/duplicate-indexes), [Follower reads](../../build-global-apps/follower-reads), and more. You can also combine these patterns as per your needs. - -{{}} -For more details, see [Build global applications](../../build-global-apps). -{{}} - -## Colocation - -Colocated tables optimize latency and performance for data access by reducing the need for additional trips across the network for small tables. Additionally, it reduces the overhead of creating a tablet for every relation (tables, indexes, and so on) and their storage per node. - -{{}} -For more details, see [Colocation](../../../additional-features/colocation/). -{{}} - -## Faster reads with covering indexes - -When a query uses an index to look up rows faster, the columns that are not present in the index are fetched from the original table. This results in additional round trips to the main table leading to increased latency. - -Use [covering indexes](../../../explore/ysql-language-features/indexes-constraints/covering-index-ysql/) to store all the required columns needed for your queries in the index. Indexing converts a standard Index-Scan to an [Index-Only-Scan](https://dev.to/yugabyte/boosts-secondary-index-queries-with-index-only-scan-5e7j). - -{{}} -For more details, see [Avoid trips to the table with covering indexes](https://www.yugabyte.com/blog/multi-region-database-deployment-best-practices/#avoid-trips-to-the-table-with-covering-indexes). -{{}} - -## Faster writes with partial indexes - -A partial index is an index that is built on a subset of a table and includes only rows that satisfy the condition specified in the WHERE clause. This speeds up any writes to the table and reduces the size of the index, thereby improving speed for read queries that use the index. - -{{}} -For more details, see [Partial indexes](../../../explore/ysql-language-features/indexes-constraints/partial-index-ysql/). -{{}} - -## Distinct keys with unique indexes - -If you need values in some of the columns to be unique, you can specify your index as UNIQUE. - -When a unique index is applied to two or more columns, the combined values in these columns can't be duplicated in multiple rows. Note that because a NULL value is treated as a distinct value, you can have multiple NULL values in a column with a unique index. - -{{}} -For more details, see [Unique indexes](../../../explore/ysql-language-features/indexes-constraints/unique-index-ysql/). -{{}} - -## Faster sequences with server-level caching - -Sequences in databases automatically generate incrementing numbers, perfect for generating unique values like order numbers, user IDs, check numbers, and so on. They prevent multiple application instances from concurrently generating duplicate values. However, generating sequences on a database that is spread across regions could have a latency impact on your applications. - -Enable [server-level caching](../../../api/ysql/exprs/sequence_functions/func_nextval/#caching-values-on-the-yb-tserver) to improve the speed of sequences, and also avoid discarding many sequence values when an application disconnects. - -{{}} -For a demo, see the YugabyteDB Friday Tech Talk on [Scaling sequences with server-level caching](https://www.youtube.com/watch?v=hs-CU3vjMQY&list=PL8Z3vt4qJTkLTIqB9eTLuqOdpzghX8H40&index=76). -{{}} - -## Fast single-row transactions - -Common scenarios of updating rows and fetching the results in multiple statements can lead to multiple round-trips between the application and server. In many cases, rewriting these statements as single statements using the RETURNING clause will lead to lower latencies as YugabyteDB has optimizations to make single statements faster. For example, the following statements: - -```sql -SELECT v FROM txndemo WHERE k=1 FOR UPDATE; -UPDATE txndemo SET v = v + 3 WHERE k=1; -SELECT v FROM txndemo WHERE k=1; -``` - -can be re-written as follows: - -```sql -UPDATE txndemo SET v = v + 3 WHERE k=1 RETURNING v; -``` - -{{}} -For more details, see [Fast single-row transactions](../../../develop/learn/transactions/transactions-performance-ysql/#fast-single-row-transactions). -{{}} - -## Delete older data quickly with partitioning - -Use [table partitioning](../../../explore/ysql-language-features/advanced-features/partitions/) to split your data into multiple partitions according to date so that you can quickly delete older data by dropping the partition. - -{{}} -For more details, see [Partition data by time](../../data-modeling/common-patterns/timeseries/partitioning-by-time/). -{{}} - -## Use the right data types for partition keys - -In general, integer, arbitrary precision number, character string (not very long ones), and timestamp types are safe choices for comparisons. - -Avoid the following: - -- Floating point number data types - because they are stored as binary float format that cannot represent most of the decimal values precisely, values that are supposedly the same may not be treated as a match because of possible multiple internal representations. - -- Date, time, and similar timestamp component types if they may be compared with values from a different timezone or different day of the year, or when either value comes from a country or region that observes or ever observed daylight savings time. - -## Use multi row inserts wherever possible - -If you're inserting multiple rows, it's faster to batch them together whenever possible. You can start with 128 rows per batch -and test different batch sizes to find the sweet spot. - -Don't use multiple statements: - -```postgresql -INSERT INTO users(name,surname) VALUES ('bill', 'jane'); -INSERT INTO users(name,surname) VALUES ('billy', 'bob'); -INSERT INTO users(name,surname) VALUES ('joey', 'does'); -``` - -Instead, group values into a single statement as follows: - -```postgresql -INSERT INTO users(name,surname) VALUES ('bill', 'jane'), ('billy', 'bob'), ('joe', 'does'); -``` - -## UPSERT multiple rows wherever possible - -PostgreSQL and YSQL enable you to do upserts using the INSERT ON CONFLICT clause. Similar to multi-row inserts, you can also batch multiple upserts in a single INSERT ON CONFLICT statement for better performance. - -In case the row already exists, you can access the existing values using `EXCLUDED.` in the query. - -The following example creates a table to track the quantity of products, and increments rows in batches: - -```postgresql -CREATE TABLE products - ( - name TEXT PRIMARY KEY, - quantity BIGINT DEFAULT 0 - ); ---- -INSERT INTO products(name, quantity) -VALUES - ('apples', 1), - ('oranges', 5) ON CONFLICT(name) DO UPDATE -SET - quantity = products.quantity + excluded.quantity; ---- -INSERT INTO products(name, quantity) -VALUES - ('apples', 1), - ('oranges', 5) ON CONFLICT(name) DO UPDATE -SET - quantity = products.quantity + excluded.quantity; ---- -SELECT * FROM products; - name | quantity ----------+---------- - apples | 2 - oranges | 10 -(2 rows) -``` - -{{}} -For more information, see [Data manipulation](../../../explore/ysql-language-features/data-manipulation). -{{}} - -## Re-use query plans with prepared statements - -Whenever possible, use [prepared statements](../../../api/ysql/the-sql-language/statements/perf_prepare/) to ensure that YugabyteDB can re-use the same query plan and eliminate the need for a server to parse the query on each operation. - -{{}} - -When using server-side pooling, avoid explicit PREPARE and EXECUTE calls and use protocol-level prepared statements instead. Explicit prepare/execute calls can make connections sticky, which prevents you from realizing the benefits of using YSQL Connection Manager{{}} and server-side pooling. - -Depending on your driver, you may have to set some parameters to leverage prepared statements. For example, Npgsql supports automatic preparation using the Max Auto Prepare and Auto Prepare Min Usages connection parameters, which you add to your connection string as follows: - -```sh -Max Auto Prepare=100;Auto Prepare Min Usages=5; -``` - -Consult your driver documentation. - -{{}} - -{{}} -For more details, see [Prepared statements in PL/pgSQL](https://dev.to/aws-heroes/postgresql-prepared-statements-in-pl-pgsql-jl3). -{{}} - -## Large scans and batch jobs - -Use BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE READ ONLY DEFERRABLE for batch or long-running jobs, which need a consistent snapshot of the database without interfering, or being interfered with by other transactions. - -{{}} -For more details, see [Large scans and batch jobs](../../../develop/learn/transactions/transactions-performance-ysql/#large-scans-and-batch-jobs). -{{}} - -## JSONB datatype - -Use the [JSONB](../../../api/ysql/datatypes/type_json) datatype to model JSON data; that is, data that doesn't have a set schema but has a truly dynamic schema. - -JSONB in YSQL is the same as the [JSONB datatype in PostgreSQL](https://www.postgresql.org/docs/11/datatype-json.html). - -You can use JSONB to group less interesting or less frequently accessed columns of a table. - -YSQL also supports JSONB expression indexes, which can be used to speed up data retrieval that would otherwise require scanning the JSON entries. - -{{< note title="Use JSONB columns only when necessary" >}} - -- A good schema design is to only use JSONB for truly dynamic schema. That is, don't create a "data JSONB" column where you put everything; instead, create a JSONB column for dynamic data, and use regular columns for the other data. -- JSONB columns are slower to read/write compared to normal columns. -- JSONB values take more space because they need to store keys in strings, and maintaining data consistency is harder, requiring more complex queries to get/set JSONB values. -- JSONB is a good fit when writes are done as a whole document with a per-row hierarchical structure. If there are arrays, the choice is not JSONB vs. column, but vs additional relational tables. -- For reads, JSONB is a good fit if you read the whole document and the searched expression is indexed. -- When reading one attribute frequently, it's better to move it to a column as it can be included in an index for an `Index Only Scan`. - -{{< /note >}} - -## Parallelizing across tablets - -For large or batch SELECT or DELETE that have to scan all tablets, you can parallelize your operation by creating queries that affect only a specific part of the tablet using the `yb_hash_code` function. - -{{}} -For more details, see [Distributed parallel queries](../../../api/ysql/exprs/func_yb_hash_code/#distributed-parallel-queries). -{{}} - -## Row size limit - -Big columns add up when you select full or multiple rows. For consistent latency or performance, it is recommended keeping the size under 10MB or less, and a maximum of 32MB. - -## Column size limit - -For consistent latency or performance, it is recommended to size columns in the 2MB range or less even though an individual column or row limit is supported till 32MB. - -## TRUNCATE tables instead of DELETE - -[TRUNCATE](../../../api/ysql/the-sql-language/statements/ddl_truncate/) deletes the database files that store the table data and is much faster than [DELETE](../../../api/ysql/the-sql-language/statements/dml_delete/), which inserts a _delete marker_ for each row in transactions that are later removed from storage during compaction runs. - -{{}} -Currently, TRUNCATE is not transactional. Also, similar to PostgreSQL, TRUNCATE is not MVCC-safe. For more details, see [TRUNCATE](../../../api/ysql/the-sql-language/statements/ddl_truncate/). -{{}} - -## Minimize the number of tablets you need - -Each table and index is split into tablets and each tablet has overhead. The more tablets you need, the bigger your universe will need to be. See [Allow for tablet replica overheads](../../../best-practices-operations/administration/#allow-for-tablet-replica-overheads) for how the number of tablets affects how big your universe needs to be. - -Each table and index consists of several tablets based on the [--ysql_num_shards_per_tserver](../../../reference/configuration/yb-tserver/#yb-num-shards-per-tserver) flag. - -You can try one of the following methods to reduce the number of tablets: - -- Use [colocation](../../../additional-features/colocation/) to group small tables into 1 tablet. -- Reduce number of tablets-per-table using the [--ysql_num_shards_per_tserver](../../../reference/configuration/yb-tserver/#yb-num-shards-per-tserver) flag. -- Use the [SPLIT INTO](../../../api/ysql/the-sql-language/statements/ddl_create_table/#split-into) clause when creating a table. -- Start with few tablets and use [automatic tablet splitting](../../../architecture/docdb-sharding/tablet-splitting/). - -Note that multiple tablets can allow work to proceed in parallel so you may not want every table to have only one tablet. diff --git a/docs/content/preview/develop/build-global-apps/_index.md b/docs/content/preview/develop/build-global-apps/_index.md deleted file mode 100644 index ea68eccbae2d..000000000000 --- a/docs/content/preview/develop/build-global-apps/_index.md +++ /dev/null @@ -1,125 +0,0 @@ ---- -title: Build global applications -headerTitle: Build global applications -linkTitle: Build global applications -description: Build globally distributed applications. -headcontent: Learn how to design globally distributed applications using simple patterns -menu: - preview: - identifier: build-global-apps - parent: develop - weight: 50 -type: indexpage -showRightNav: true ---- - -{{}} - -Internet and cloud technologies have revolutionized the way people interact and operate. Cloud introduces the possibility of distributing and replicating data across multiple geographies. Accessing and maintaining that globally distributed data demands a new class of global application. - -## The need for global applications - -The reasons for making your applications global are the same as those for adopting a distributed database: - -- **Business continuity and disaster recovery**. Although public clouds have come a long way since the inception of AWS in 2006, region and zone outages are still fairly common, happening once or twice a year (see, for example, [AWS outages](https://en.wikipedia.org/wiki/Timeline_of_Amazon_Web_Services#Amazon_Web_Services_outages) and [Google outages](https://en.wikipedia.org/wiki/Google_services_outages#:~:text=During%20eight%20episodes%2C%20one%20in,Google%20service%20in%20August%202013)). To provide uninterrupted service to your users, you need to run your applications in multiple locations. - -- **Data residency for compliance**. To comply with data residency laws (for example, the [GDPR](https://en.wikipedia.org/wiki/General_Data_Protection_Regulation)), you need to ensure that the data of citizens is stored on servers located in their country. This means that you need to design your applications to split data across geographies accordingly. - -- **Moving data closer to users**. When designing an application with global reach (for example, email, e-commerce, or broadcasting events like the Olympics), you need to take into account where your users are located. If your application is hosted in data centers located in the US, users in Europe might encounter high latency when trying to access your application. To provide the best user experience, you need to run your applications closer to your users. - -## Application design patterns - -Running applications in multiple data centers with data split across them is not a trivial task. When designing global applications, you need to answer questions such as: - -- How will multiple instances of the application run in different fault domains (regions/data centers)? -- Will the application instances be identical or different? -- How will these applications be deployed across geo-locations? -- Will each instance operate on the entire dataset or just a subset of the data? -- Will conflicting application updates be allowed? If so, how are these handled? -- How will the application evolve? -- How will the application behave on a fault domain failure? - -To help you answer these questions, use the following architectural concepts to choose a suitable design pattern for your application. - -### Application architecture - -Depending on where the application instances run and which ones are active, choose from the following application architectures: - -- **Single Active** - Only one application instance in a region (or fault domain) is active. The data must be placed close to that application. On failure, the application moves to a different region. -- **Multi-Active** - Applications run in different regions and operate on the entire data set. -- **Read-Only Multi-Active** - Only one application instance is active, while the others can serve stale reads. -- **Partitioned Multi-Active** - Multiple applications run in multiple regions and operate on just a subset of data. - -### Availability architecture - -Depending on whether the application instances operate on the entire dataset or just a subset, and how the application moves on a fault domain failure, choose from the following availability architectures: - -- **Follow the application** - Only one application instance is active, while the others (one or more) can serve stale reads. -- **Geo-local dataset** - Applications act on geographically placed local data. On failure, the application does not move. - -### Data access architecture - -Depending on whether the application should read the latest data or stale data, choose from the following data access architectures: - -- **Consistent reads** - Read from the source of truth, irrespective of latency or location. -- **Follower reads** - Stale reads to achieve lower latency reads. -- **Read replicas** - Allow stale reads but with bounds on how stale data is. - -## Pick the right pattern - -Use the following matrix to choose a [design pattern](#design-patterns), based on the architectures described in the preceding section. - -| Pattern Type | Follow the Application | Geo-Local Dataset | -| ---------------------------- | -------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | -| **Single Active** | [Global database](./global-database)
[Active-active single-master](./active-active-single-master) | N/A | -| **Multi Active** | [Global database](./global-database)
[Duplicate indexes](./duplicate-indexes) | [Active-active multi-master](./active-active-multi-master) | -| **Partitioned Multi Active** | [Latency-optimized geo-partitioning](./latency-optimized-geo-partition) | [Locality-optimized geo-partitioning](./locality-optimized-geo-partition) | -| **Data Access Architecture** | [Consistent Reads](./global-database)
[Follower Reads](./follower-reads)
[Read Replicas](./read-replicas) | | - -## Design patterns - -The following table summarizes the design patterns that you can use for your applications. Use these proven patterns to address common problems and accelerate your application development. - -{{}} - -| Pattern Name | Description | -| ------- | ----------- | -| [Global database](./global-database) | -{{
}} Single database spread across multiple regions {{
}} -A database spread across multiple (3 or more) regions or zones. On failure, a replica in another region/zone will be promoted to leader in seconds, without any loss of data. Applications read from source of truth, possibly with higher latencies.| - -|[Duplicate indexes](./duplicate-indexes)| -{{
}} Consistent data everywhere {{
}} -Set up covering indexes with schema the same as the table in multiple regions to read immediately consistent data locally.| - -|[Active‑active single‑master](./active-active-single-master)| -{{
}} Secondary database that can serve reads {{
}} -Set up a second cluster that gets populated asynchronously and can start serving data in case the primary fails. Can also be used for [blue-green](https://en.wikipedia.org/wiki/Blue-green_deployment) deployment testing.| - -|[Active‑active multi‑master](./active-active-multi-master)| -{{
}} Two clusters serving data together {{
}} -Two regions or more, manual failover, a few seconds of data loss (non-zero RPO), low read/write latencies, some caveats on transactional guarantees.| - -|[Latency‑optimized geo‑partitioning](./latency-optimized-geo-partition)| -{{
}} Fast local access {{
}} -Partition your data and place it such that the data belonging to nearby users can be accessed faster.| - -|[Locality‑optimized geo‑partitioning](./locality-optimized-geo-partition)| -{{
}} Local law compliance {{
}} -Partition your data and place it such that the rows belonging to different users are located in their respective countries.| - -|[Follower Reads](./follower-reads) | -{{
}} Fast, stale reads {{
}} -Read from local followers instead of going to the leaders in a different region.| - -|[Read Replicas](./read-replicas) | -{{
}} Fast reads from a read-only cluster {{
}} -Set up a separate cluster of just followers to perform local reads instead of going to the leaders in a different region.| - -{{
}} - -## Legend - -All the illustrations in this section use the following legend to represent tablet leaders and followers, cloud regions and zones, and applications. - -![Global Database - Legend](/images/develop/global-apps/global-database-legend.png) diff --git a/docs/content/preview/develop/build-global-apps/active-active-multi-master.md b/docs/content/preview/develop/build-global-apps/active-active-multi-master.md deleted file mode 100644 index f567cdd135e0..000000000000 --- a/docs/content/preview/develop/build-global-apps/active-active-multi-master.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: Active-Active Multi-Master design pattern for global applications -headerTitle: Active-active multi-master -linkTitle: Active-active multi-master -description: Multi-Master dual cluster for global applications -headcontent: Multi-Master dual cluster using asynchronous xCluster deployment -menu: - preview: - identifier: global-apps-active-active-multi-master - parent: build-global-apps - weight: 400 -type: docs ---- - -For applications that have to be run in multiple regions, you can adopt the **Active-Active Multi-Master** pattern, where you set up two clusters in two different regions, and both clusters actively take responsibility for local reads and writes and replicate data **asynchronously**. In this case, failover is manual and incurs possible loss of data, as the data is asynchronously replicated between the two clusters, but both reads and writes have low latencies. - -{{}} -Application instances are active in multiple regions and may read stale data. -{{}} - -## Overview - -{{}} - -Suppose you have cluster with a replication factor of 3, and applications deployed in `us-west`. - -![RF3 cluster in one region](/images/develop/global-apps/aa-single-master-1region.png) - -This ensures that the reads and writes are in the same region, with the expected low latencies. Because the entire cluster is in a single region, in case of a region failure, you would lose all the data. - -## Multi-master - -You can eliminate the possibility of data loss by setting up another cluster in a different region, say `us-east`, using [xCluster Setup](../../../deploy/multi-dc/async-replication/async-deployment). - -![Active-Active Multi-Master](/images/architecture/replication/active-active-deployment-new.png) - -The `us-east` cluster is independent of the primary cluster in `us-west` and the data is populated by **asynchronous replication**. This means that the read and write latencies of each cluster are not affected by the other, but at the same time, the data in each cluster is not immediately consistent with the other. You can use this pattern to reduce latencies for local users. - -## Transactional consistency - -The **Active-Active Multi-Master** pattern does not guarantee any transactional consistency during the replication between the clusters. Conflicts are resolved in the bi-directional replication by adopting the "last-writer wins" strategy. - -## Failover - -When one of the clusters fails, say `us-west`, the other cluster in `us-east` can handle reads and writes for all applications until the failed region recovers. - -![Failover](/images/develop/global-apps/aa-multi-master-failover.png) - -## Learn more - -- [xCluster architecture](../../../architecture/docdb-replication/async-replication) diff --git a/docs/content/preview/develop/build-global-apps/active-active-single-master.md b/docs/content/preview/develop/build-global-apps/active-active-single-master.md deleted file mode 100644 index 08020d2b9e9d..000000000000 --- a/docs/content/preview/develop/build-global-apps/active-active-single-master.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: Active-Active Single-Master design pattern for global applications -headerTitle: Active-active single-master -linkTitle: Active-active single-master -description: An active and a stand by cluster for global applications -headcontent: Active cluster with standby using asynchronous xCluster deployment -menu: - preview: - identifier: global-apps-active-active-single-master - parent: build-global-apps - weight: 500 -type: docs ---- -For applications that run in a single region but need a safety net, you can adopt the **Active-Active Single-Master** pattern. This involves setting up two clusters in different regions. One cluster actively handles all reads and writes, while asynchronously replicating data to the second cluster. The second cluster can serve transactionally consistent but slightly stale reads. In the event of a failure of the first cluster, the second cluster can be promoted to Primary, a process known as **Disaster Recovery**. This setup is particularly useful when you have only two regions, or you want to deploy the database in only one region for low latency writes, while maintaining another copy in the other region for failover and low latency reads. - -{{}} -Only one application instance is actively writing at any time. A replica cluster serves transactionally consistent reads and can be promoted to Primary in case of failure. -{{}} - -## Setup - -{{}} - -Suppose you have cluster with a replication factor of 3, and applications deployed in `us-west`. - -![RF3 cluster in one region](/images/develop/global-apps/aa-single-master-1region.png) - -This ensures that the reads and writes are in the same region, with the expected low latencies. Because the entire cluster is in a single region, in case of a region failure, you would lose all the data. - -## Secondary replica cluster - -You can set up a secondary cluster in a different region, say `us-east`, using [Transactional xCluster Setup](../../../deploy/multi-dc/async-replication/async-transactional-setup-automatic). - -![Active-Active Single Master](/images/develop/global-apps/aa-single-master-setup.png) - -The `us-east` cluster (_Standby_) is independent of the primary cluster in `us-west` and the data is populated by **asynchronous replication** from the `us-west` cluster (_Primary_). This means that the read and write latencies of the Primary cluster are not affected, but at the same time, the data is not immediately available on the Standby cluster. The _Standby_ cluster acts as a **replica cluster** and can take over as Primary in case of a failure. - -This can also be used for [blue-green](https://en.wikipedia.org/wiki/Blue-green_deployment) deployment testing. - -## Local reads - -Because the second cluster has the same schema and the data (with a short lag), it can serve stale but transactionally consistent reads for local applications. - -![Active-Active Single Master](/images/develop/global-apps/aa-single-master-reads.png) - -Writes still have to go to the primary cluster in `us-west`. - -## Failover - -When the Primary cluster in `us-west` fails, the Standby cluster in `us-east` can be promoted to become the primary and can start serving both reads and writes. - -![Active-Active Single Master - Failover](/images/develop/global-apps/aa-single-master-failover.png) - -## Learn more - -- [xCluster architecture](../../../architecture/docdb-replication/async-replication) diff --git a/docs/content/preview/develop/build-global-apps/duplicate-indexes.md b/docs/content/preview/develop/build-global-apps/duplicate-indexes.md deleted file mode 100644 index d4213a13306d..000000000000 --- a/docs/content/preview/develop/build-global-apps/duplicate-indexes.md +++ /dev/null @@ -1,166 +0,0 @@ ---- -title: Duplicate Indexes for global applications -headerTitle: Duplicate indexes -linkTitle: Duplicate indexes -description: Enhance the performance of global applications with Duplicate Indexes -headcontent: Enhance the performance of global applications with Duplicate Indexes -menu: - preview: - identifier: global-apps-duplicate-indexes - parent: build-global-apps - weight: 300 -type: docs ---- - -If you have applications running in multiple regions, they still have to go the tablet leaders in the other regions for reads and writes. Although writes must always go to the tablet leaders, the speed of reads can be improved by using [follower reads](../follower-reads) or [read replicas](../read-replicas). But in both of these setups, the replicas may not be up-to-date with the latest data, resulting in stale reads. This may not be acceptable for some applications. - -This is where **Duplicate Indexes** come in handy. Duplicate indexes guarantee immediately consistent reads in multiple regions. This section describes how applications can benefit from this pattern, and the associated costs. - -{{}} -Application instances are active in all regions and do consistent reads with the lowest latency. -{{}} - -## Overview - -Suppose you have an RF 3 [Global Database](../global-database) spread across `us-east`, `us-central`, and `us-west`, your application is running in `us-east`, and so you have set the leader preference to `us-east`. - -{{}} - -![RF3 Global Database](/images/develop/global-apps/duplicate-indexes-global-database.png) - -Adding an application in `us-central` gives the following setup. - -![RF3 Global Database](/images/develop/global-apps/duplicate-indexes-central-app.png) - -In this scenario, the application in `us-central` has a read latency of 30 ms, whereas the application in `us-east` has a read latency of only 2 ms. - -{{}} -Reduce the 30 ms access latency of applications in `us-central`. -{{}} - -This becomes worse when you add an application in `us-west`. - -![RF3 Global Database](/images/develop/global-apps/duplicate-indexes-west-app.png) - -The application in `us-west` has a high read latency of 60 ms. - -{{}} -Reduce the 60 ms access latency of applications in `us-west` -{{}} - -## Duplicate indexes - -By default, all reads go to the leader, so even though the replicas are available in other regions, applications incur cross-region latency if the leaders are in a different region than the application. - -To address this, you can create multiple [covering indexes](../../../explore/ysql-language-features/indexes-constraints/covering-index-ysql/) with the same schema as the table, and attach them to different tablespaces, with leader preference set to each region. - -To set this up, do the following: - -1. Create a basic table of users, which has the `id`, `name`, and `city` for each user. - - ```plpgsql - CREATE TABLE users ( - id INTEGER NOT NULL, - name VARCHAR, - city VARCHAR - ); - ``` - -1. Create multiple tablespaces (one for every region you want your index leader to be located in) and set leader preference to that region: - - {{}} -Even though the leader preference is set to a region, you should place the replicas in other regions for outage scenarios, depending on your application setup. - {{}} - - ```plpgsql - -- tablespace for west data - CREATE TABLESPACE west WITH ( - replica_placement= '{ - "num_replicas" : 3, - "placement_blocks" : [ - {"cloud":"aws","region":"us-west","zone":"us-west-1a","leader_preference": 1,"min_num_replicas":1}, - {"cloud":"aws","region":"us-east","zone":"us-east-1a","min_num_replicas":1}, - {"cloud":"aws","region":"us-central","zone":"us-central-1a","min_num_replicas":1} - ]}'); - - -- tablespace for central data - CREATE TABLESPACE central WITH ( - replica_placement= '{ - "num_replicas" : 3, - "placement_blocks" : [ - {"cloud":"aws","region":"us-west","zone":"us-west-1a","min_num_replicas":1}, - {"cloud":"aws","region":"us-east","zone":"us-east-1a","min_num_replicas":1}, - {"cloud":"aws","region":"us-central","zone":"us-central-1a","leader_preference": 1,"min_num_replicas":1} - ]}'); - - -- tablespace for east data - CREATE TABLESPACE east WITH ( - replica_placement= '{ - "num_replicas" : 3, - "placement_blocks" : [ - {"cloud":"aws","region":"us-west","zone":"us-west-1a","min_num_replicas":1}, - {"cloud":"aws","region":"us-east","zone":"us-east-1a","leader_preference": 1,"min_num_replicas":1}, - {"cloud":"aws","region":"us-central","zone":"us-central-1a","min_num_replicas":1} - ]}'); - ``` - -1. Create multiple duplicate indexes and attach them to region-level tablespaces. - - ```plpgsql - CREATE INDEX idx_west ON users (name) INCLUDE (id, city) TABLESPACE west; - CREATE INDEX idx_east ON users (name) INCLUDE (id, city) TABLESPACE east; - CREATE INDEX idx_central ON users (name) INCLUDE (id, city) TABLESPACE central; - ``` - -This creates three clones of the covering index, with leaders in different regions, and at the same time replicated in the other regions. The following illustration shows the result. - -![Duplicate indexes](/images/develop/global-apps/duplicate-indexes-create.png) - -## Reduced read latency - -Consider the query plan to fetch the `id` and `city` for a user `John Wick` for the application running in `us-west`: - -```plpgsql -explain analyze select id, city from users where name = 'John Wick' ; -``` - -```output - QUERY PLAN ------------------------------------------------------------------------------------ - Index Only Scan using idx_west on users (actual time=2.274..2.274 rows=1 loops=1) - Index Cond: (name = 'John Wick'::text) - Heap Fetches: 0 - Planning Time: 0.225 ms - Execution Time: 2.386 ms - Peak Memory Usage: 8 kB -``` - -Because you have added all of the columns needed for your queries as part of the covering index, the query executor doesn't have to go to the tablet leader (in a different region) to fetch the data. The **geo-aware query planner** will prefer to use the index (`idx_west`), whose leaders are local to the region, when querying. Note that the read latency is just ~2.2 ms instead of the original ~60 ms. - -{{}} -The query planner optimizations related to picking the right index by taking into consideration the leader preference of the tablespace in which the index lives are available in v2.17.3 and later. -{{}} - -![Duplicate indexes](/images/develop/global-apps/duplicate-indexes-read-latencies.png) - -All the applications now read locally with a reduced read latency of 2 ms. When you set up your cluster using duplicate indexes, it has the effect of having consistent leaders for the table in each region. - -## Increased write latency - -The following illustration shows the write latencies. - -![Duplicate indexes](/images/develop/global-apps/duplicate-indexes-write-latencies.png) - -The write latencies have increased because each write has to update the tablet leader, its replicas, and three index leaders and their replicas. Effectively you are sacrificing write latency to achieve highly reduced read latency. - -## Failover - -In the case of zone or region failures, followers in other regions are elected leaders and the applications connect to the closest region automatically as shown in the following illustration. - -![Duplicate indexes failover](/images/develop/global-apps/duplicate-indexes-failover.png) - -## Learn more - -- [Tablespaces](../../../explore/going-beyond-sql/tablespaces/) -- [Covering Indexes](../../../explore/ysql-language-features/indexes-constraints/covering-index-ysql/) -- [Create Index](../../../api/ysql/the-sql-language/statements/ddl_create_index/) diff --git a/docs/content/preview/develop/build-global-apps/follower-reads.md b/docs/content/preview/develop/build-global-apps/follower-reads.md deleted file mode 100644 index 7f2b574a6c14..000000000000 --- a/docs/content/preview/develop/build-global-apps/follower-reads.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -title: Follower Reads for global applications -headerTitle: Follower reads -linkTitle: Follower reads -description: Reduce read latency using Follower reads -headcontent: Reduce read latency for global applications -menu: - preview: - identifier: global-apps-follower-reads - parent: build-global-apps - weight: 800 -rightNav: - hideH3: true - hideH4: true -type: docs ---- - -When applications run in multiple regions, they incur cross-region latency to read the latest data from the leader, even though there could be a follower present locally. This is because all followers may not have the latest data at the read time. - -In some scenarios, however, reading from the leader is not necessary. For example: - -- The data does not change often (for example, movie database). -- The application does not need the latest data (for example, yesterday's report). - -If a little staleness for reads is acceptable for the application running in other regions, then **Follower Reads** is the pattern to adopt. - -{{}} -Multiple application instances are active and some instances read stale data. -{{}} - -## Setup - -{{}} - -Suppose you have a [Global Database](../global-database) set up across 3 regions `us-east`, `us-central`, and `us-west`, with leader preference set to `us-east`. Suppose further that you want to run applications in all 3 regions. Read latencies would be similar to the following illustration. - -![Global Apps - setup](/images/develop/global-apps/global-apps-follower-reads-setup.png) - -## Follower reads - -Enable follower reads for your application using the following statements: - -```plpgsql -SET session characteristics as transaction read only; -SET yb_read_from_followers = true; -``` - -This allows the application to read data from the closest follower (or leader). - -![Follower reads - setup](/images/develop/global-apps/global-apps-follower-reads-final.png) - -In this scenario, the read latency for the application in `us-west` drops drastically to 2 ms from the initial 60 ms, and the read latency of the application in `us-central` also drops to 2 ms. - -As replicas may not be up-to-date (by design), this might return slightly stale data (the default staleness is 30 seconds). This is the case even if the read goes to a leader. - -You can change the staleness value using the following YSQL configuration parameter: - -```plpgsql -SET yb_follower_read_staleness_ms = 10000; -- 10s -``` - -Although the default is recommended, you can set the staleness to a shorter value. The tradeoff is the shorter the staleness, the more likely some reads may be redirected to the leader if the follower isn't sufficiently caught up. You shouldn't set `yb_follower_read_staleness_ms` to less than 2x the [raft_heartbeat_interval_ms](../../../reference/configuration/yb-tserver/#raft-heartbeat-interval-ms) (which by default is 500 ms). - -{{}} -Follower reads only affect reads. All writes are still handled by the leader. -{{}} - -## Failover - -When the follower in a region fails, the application redirects its reads to the next closest follower/leader. - -![Follower reads - Failover](/images/develop/global-apps/global-apps-follower-reads-failover.png) - -Notice how the application in `us-west` reads from the follower in `us-central` when the follower in `us-west` has failed. The read latency is 40 ms, still much less than the original 60 ms. - -## Learn more - -- [Follower reads](../../../explore/going-beyond-sql/follower-reads-ysql/) diff --git a/docs/content/preview/develop/build-global-apps/global-database.md b/docs/content/preview/develop/build-global-apps/global-database.md deleted file mode 100644 index 1c490ea78080..000000000000 --- a/docs/content/preview/develop/build-global-apps/global-database.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: Global database pattern for global applications -headerTitle: Global database -linkTitle: Global database -description: Build highly available global applications -headcontent: Design highly-available applications using a global database -menu: - preview: - identifier: global-apps-global-database - parent: build-global-apps - weight: 210 -rightNav: - hideH3: true - hideH4: true -type: docs ---- - -For many applications, a single-region multi-zone deployment may suffice. But global applications that are designed to serve users across multiple geographies and be highly available have to be deployed in multiple regions. - -To be ready for region failures and be highly available, you can set up YugabyteDB as a cluster that spans multiple regions. This stretch cluster is known as a **Global Database**. - -{{}} -Application is active in one region at a time and does consistent reads. -{{}} - -## Setup - -Suppose you want your cluster distributed across three regions (`us-east`, `us-central`, and `us-west`) and that you are going to run your application in `us-east` with failover set to `us-central`. To do this, you set up a cluster with a replication factor (RF) of 5, with two replicas of the data in the primary and failover regions and the last copy in the third region. - -{{}} -Although you could use an RF 3 cluster, an RF 5 cluster provides quicker failover; with two replicas in the preferred regions, when a leader fails, a local follower can be elected as a leader, rather than a follower in a different region. -{{}} - - -{{