From d589c0818337f9c6d59bd187481bd7bd822e49d5 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 31 Jan 2025 00:41:58 +0100 Subject: [PATCH 1/3] landing pages for Getting Started page --- docs/en/concepts/index.md | 17 +++ docs/en/faq/index.md | 15 +++ docs/en/guides/inserting-data.md | 3 +- docs/en/guides/writing-queries.md | 5 +- docs/en/introduction-index.md | 15 +++ docs/en/managing-data/core-concepts/index.md | 2 +- .../bigquery/equivalent-concepts.md | 2 +- docs/en/migrations/bigquery/index.md | 15 +++ docs/en/migrations/index.md | 16 +++ docs/en/migrations/postgres/index.md | 18 +++ docs/en/starter-guides/index.md | 16 +++ docs/en/use-cases/index.md | 12 ++ docs/en/use-cases/observability/index.md | 107 +++--------------- .../use-cases/observability/introduction.md | 100 ++++++++++++++++ docusaurus.config.js | 4 + sidebars.js | 20 ++-- 16 files changed, 258 insertions(+), 109 deletions(-) create mode 100644 docs/en/concepts/index.md create mode 100644 docs/en/faq/index.md create mode 100644 docs/en/introduction-index.md create mode 100644 docs/en/migrations/bigquery/index.md create mode 100644 docs/en/migrations/index.md create mode 100644 docs/en/migrations/postgres/index.md create mode 100644 docs/en/starter-guides/index.md create mode 100644 docs/en/use-cases/index.md create mode 100644 docs/en/use-cases/observability/introduction.md diff --git a/docs/en/concepts/index.md b/docs/en/concepts/index.md new file mode 100644 index 00000000000..2ac7f939a7d --- /dev/null +++ b/docs/en/concepts/index.md @@ -0,0 +1,17 @@ +--- +title: Concepts +slug: /en/concepts +description: Landing page for concepts +pagination_next: null +pagination_prev: null +--- + +In this section of the docs we'll dive into the concepts around what makes ClickHouse so fast and efficient. + +| Page | Description | +|------------------------------------------------------------------|---------------------------------------------------------------------------------------| +| [Why is ClickHouse so Fast?](./why-clickhouse-is-so-fast.md) | Learn what makes ClickHouse so fast. +| [What is OLAP?](./olap.md) | Learn what Online Analytical Processing is. +| [Why is ClickHouse unique?](../about-us/distinctive-features.md) | Learn what makes ClickHouse unique. +| [Glossary](./glossary.md) | This page contains a glossary of terms you'll commonly encounter throughout the docs. +| [FAQ](../faq/index.md) | A compilation of the most frequently asked questions we get about ClickHouse. diff --git a/docs/en/faq/index.md b/docs/en/faq/index.md new file mode 100644 index 00000000000..bd8a4a2fce8 --- /dev/null +++ b/docs/en/faq/index.md @@ -0,0 +1,15 @@ +--- +slug: /en/concepts/faq +title: FAQ +description: Landing page for FAQ +pagination_prev: null +pagination_next: null +--- + +| Page | Description | +|---------------------------------------------------------------|----------------------------------------------------------------------------------------| +| [General Questions about ClickHouse](general/index.md) | General questions we get about ClickHouse. | +| [Why not use something like MapReduce?](general/mapreduce.md) | Explainer on why MapReduce implementations are not appropriate for the OLAP scenario. | +| [What does "не тормозит" mean](general/ne-tormozit.md) | Explainer on what "не тормозит" means, which you may have seen on ClickHouse t-shirts. | +| [What is OLAP](general/olap.md) | Explainer on what Online Analytical Processing is. | +| [Who is using ClickHouse](general/who-is-using-clickhouse.md) | Learn about who is using ClickHouse. | \ No newline at end of file diff --git a/docs/en/guides/inserting-data.md b/docs/en/guides/inserting-data.md index dccdcfbe183..56fc1760e4b 100644 --- a/docs/en/guides/inserting-data.md +++ b/docs/en/guides/inserting-data.md @@ -1,7 +1,8 @@ --- -title: Inserting Data +title: Inserting ClickHouse data description: How to insert data into ClickHouse keywords: [insert, insert data, insert into table] +sidebar_label: Inserting ClickHouse data --- ## Basic Example diff --git a/docs/en/guides/writing-queries.md b/docs/en/guides/writing-queries.md index 711a952ae1c..30d75902e9a 100644 --- a/docs/en/guides/writing-queries.md +++ b/docs/en/guides/writing-queries.md @@ -1,10 +1,9 @@ --- sidebar_position: 3 -sidebar_label: SELECT Queries +sidebar_label: Selecting data +title: Selecting ClickHouse Data --- -# SELECT Queries in ClickHouse - ClickHouse is a SQL database, and you query your data by writing the same type of `SELECT` queries you are already familiar with. For example: ```sql diff --git a/docs/en/introduction-index.md b/docs/en/introduction-index.md new file mode 100644 index 00000000000..0cb77ff450f --- /dev/null +++ b/docs/en/introduction-index.md @@ -0,0 +1,15 @@ +--- +slug: /en/introduction +title: Introduction +description: Landing page for Introduction +pagination_next: null +--- + +Welcome to ClickHouse! Check out the pages below to learn how to get up and running with ClickHouse - the fastest and most resource efficient real-time data warehouse and open-source database. + +| Page | Description | +|-------------------------------------------|--------------------------------------------------------------------| +| [What is ClickHouse?](about-us/intro.mdx) | Learn more about what ClickHouse is. | +| [Quick Start](quick-start.mdx) | Quick start guide to get you up and running in no time. | +| [Advanced Tutorial](tutorial.md) | Comfortable with the basics? Let's do something more interesting. | +| [Install](getting-started/install.md) | Learn about the various ways you can install ClickHouse. | diff --git a/docs/en/managing-data/core-concepts/index.md b/docs/en/managing-data/core-concepts/index.md index d9aeb597b28..c62f0fc47c8 100644 --- a/docs/en/managing-data/core-concepts/index.md +++ b/docs/en/managing-data/core-concepts/index.md @@ -1,5 +1,5 @@ --- -slug: /en/concepts +slug: /en/managing-data/core-concepts title: Core Concepts description: Learn Core Concepts of how ClickHouse works keywords: [concepts, part, partition, primary index] diff --git a/docs/en/migrations/bigquery/equivalent-concepts.md b/docs/en/migrations/bigquery/equivalent-concepts.md index 530d8cf610c..5d1b9e51eeb 100644 --- a/docs/en/migrations/bigquery/equivalent-concepts.md +++ b/docs/en/migrations/bigquery/equivalent-concepts.md @@ -1,6 +1,6 @@ --- title: BigQuery vs ClickHouse Cloud -slug: /en/migrations/bigquery +slug: /en/migrations/bigquery/biquery-vs-clickhouse-cloud description: How BigQuery differs from ClickHouse Cloud keywords: [migrate, migration, migrating, data, etl, elt, BigQuery] --- diff --git a/docs/en/migrations/bigquery/index.md b/docs/en/migrations/bigquery/index.md new file mode 100644 index 00000000000..3b86ffb4662 --- /dev/null +++ b/docs/en/migrations/bigquery/index.md @@ -0,0 +1,15 @@ +--- +slug: /en/migrations/bigquery +title: BigQuery +pagination_prev: null +pagination_next: null +--- + +In this section of the docs, learn more about the similarities and differences between BigQuery and ClickHouse Cloud, as well as why you might want to migrate and how to do so. + +| Page | Description | +|-----------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------| +| [BigQuery vs ClickHouse Cloud](./equivalent-concepts.md) | The way resources are organized in ClickHouse Cloud is similar to BigQuery's resource hierarchy. We describe the specific differences in this article. | +| [Migrating from BigQuery to ClickHouse Cloud](./migrating-to-clickhouse-cloud.md) | Learn about why you might want to migrate from BigQuery to ClickHouse Cloud. | +| [Loading Data](./loading-data.md) | A guide showing you how to migrate data from BigQuery to ClickHouse. | + diff --git a/docs/en/migrations/index.md b/docs/en/migrations/index.md new file mode 100644 index 00000000000..fdff4cf9927 --- /dev/null +++ b/docs/en/migrations/index.md @@ -0,0 +1,16 @@ +--- +slug: en/migrations +title: Migrations +pagination_prev: null +pagination_next: null +--- + +| Page | Description | +|-------------------------------------------------------------------|--------------------------------| +| [BigQuery](bigquery/index.md) | Migration guide for BigQuery | +| [Snowflake](./snowflake.md) | Migration guide for Snowflake | +| [PostgreSQL](postgres/index.md) | Migration guide for PostgreSQL | +| [MySQL](../integrations/data-ingestion/dbms/mysql/index.md) | Migration guide for MySQL | +| [Redshift](../integrations/data-ingestion/redshift/index.md) | Migration guide for Redshift | +| [DynamoDB](../integrations/data-ingestion/dbms/dynamodb/index.md) | Migration guide for DynamoDB | +| [Rockset](../integrations/migration/rockset.md) | Migration guide for Rockset | diff --git a/docs/en/migrations/postgres/index.md b/docs/en/migrations/postgres/index.md new file mode 100644 index 00000000000..22190011575 --- /dev/null +++ b/docs/en/migrations/postgres/index.md @@ -0,0 +1,18 @@ +--- +slug: en/migrations/postgresql +pagination_prev: null +pagination_next: null +title: PostgreSQL +--- + +| Page | Description | +|--------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + [Introduction](./overview.md) | Introduction page for this section | + [Connecting to PostgreSQL](../../integrations/data-ingestion/dbms/postgresql/connecting-to-postgresql.md) | This page covers the following options for integrating PostgreSQL with ClickHouse: ClickPipes, PeerDB, PostgreSQL table engine, MaterializedPostgreSQL database engine. | + [Comparing PostgreSQL and ClickHouse](../../integrations/data-ingestion/dbms/postgresql/postgres-vs-clickhouse.md) | Explores Postgres vs ClickHouse - Equivalent and different concepts. | + [Loading data from PostgreSQL to ClickHouse](./dataset.md) | Part 1 of a guide on migrating from PostgreSQL to ClickHouse. + [Designing Schemas](./designing-schemas.md) | Part 2 of a guide on migrating from PostgreSQL to ClickHouse. | + [Data modeling techniques](./data-modeling-techniques.md) | Part 3 of a guide on migrating from PostgreSQL to ClickHouse. | + [Rewrite PostgreSQL Queries](../../integrations/data-ingestion/dbms/postgresql/rewriting-postgres-queries.md) | Part 4 of a guide on migrating from PostgreSQL to ClickHouse. | + [How to insert data from PostgreSQL](../../integrations/data-ingestion/dbms/postgresql/inserting-data.md) | Learn how to bulk load data from PostgreSQL to ClickHouse. | + [Data Type Mappings for PostgreSQL](../../integrations/data-ingestion/dbms/postgresql/data-type-mappings.md) | Table showing the equivalent ClickHouse data types for Postgres. | diff --git a/docs/en/starter-guides/index.md b/docs/en/starter-guides/index.md new file mode 100644 index 00000000000..fdce2fb4219 --- /dev/null +++ b/docs/en/starter-guides/index.md @@ -0,0 +1,16 @@ +--- +slug: /en/starter-guides +title: Starter Guides +description: Landing page for starter guides +pagination_prev: null +pagination_next: null +--- + +In this section of the docs you'll find starter guides for common SQL queries: `CREATE`, `INSERT`, `SELECT`, and mutations `UPDATE` and `DELETE`. + +| Page | Description | +|------------------------------------------------------------|------------------------------------------------------------------------| +| [Create Tables](../guides/creating-tables.md) | Starter guide on how to create a table. | +| [Insert Data](../guides/inserting-data.md) | Starter guide on how to insert data into a table. | +| [Select Data](../guides/writing-queries.md) | Starter guide on how to select data from a table. | +| [Update and Delete Data](../guides/developer/mutations.md) | Starter guide on mutations - updating and deleting data in ClickHouse. | \ No newline at end of file diff --git a/docs/en/use-cases/index.md b/docs/en/use-cases/index.md new file mode 100644 index 00000000000..344d6d01246 --- /dev/null +++ b/docs/en/use-cases/index.md @@ -0,0 +1,12 @@ +--- +slug: /en/use-cases +title: Use Case Guides +pagination_prev: null +pagination_next: null +--- + +In this section of the docs you can find our use case guides. + +| Page | Description | +|-----------------------------------------|---------------------------------------------------------------------| +| [Observability](observability/index.md) | Use case guide on how to setup and use ClickHouse for Observability | \ No newline at end of file diff --git a/docs/en/use-cases/observability/index.md b/docs/en/use-cases/observability/index.md index ff2209e197f..04510f04772 100644 --- a/docs/en/use-cases/observability/index.md +++ b/docs/en/use-cases/observability/index.md @@ -1,100 +1,19 @@ --- +slug: /en/use-cases/observability title: Observability -description: Using ClickHouse as an observability solution -slug: /en/observability -keywords: [observability, logs, traces, metrics, OpenTelemetry, Grafana, OTel] +pagination_prev: null +pagination_next: null --- -# Using ClickHouse for Observability +Welcome to our Observability use case guide. In this guide you'll learn how you can get setup and use ClickHouse for Observability. -## Introduction +Navigate to the pages below to explore the different sections of this guide. -This guide is designed for users looking to build their own SQL-based Observability solution using ClickHouse, focusing on logs and traces. This covers all aspects of building your own solution including considerations for ingestion, optimizing schemas for your access patterns and extracting structure from unstructured logs. - -ClickHouse alone is not an out-of-the-box solution for Observability. It can, however, be used as a highly efficient storage engine for Observability data, capable of unrivaled compression rates and lightning-fast query response times. In order for users to use ClickHouse within an Observability solution, both a user interface and data collection framework are required. We currently recommend using **Grafana** for visualization of Observability signals and **OpenTelemetry** for data collection (both are officially supported integrations). - -NEEDS ALT - -
- -:::note Not just OpenTelemetry -While our recommendation is to use the OpenTelemetry (OTel) project for data collection, similar architectures can be produced using other frameworks and tools e.g. Vector and Fluentd (see [an example](https://clickhouse.com/blog/kubernetes-logs-to-clickhouse-fluent-bit) with Fluent Bit). Alternative visualization tools also exist including Superset and Metabase. -::: - -## Why use ClickHouse? - -The most important feature of any centralized Observability store is its ability to quickly aggregate, analyze, and search through vast amounts of log data from diverse sources. This centralization streamlines troubleshooting, making it easier to pinpoint the root causes of service disruptions. - -With users increasingly price-sensitive and finding the cost of these out-of-the-box offerings to be high and unpredictable in comparison to the value they bring, cost-efficient and predictable log storage, where query performance is acceptable, is more valuable than ever. - -Due to its performance and cost efficiency, ClickHouse has become the de facto standard for logging and tracing storage engines in observability products. - -More specifically, the following means ClickHouse is ideally suited for the storage of observability data: - -- **Compression** - Observability data typically contains fields for which the values are taken from a distinct set e.g. HTTP codes or service names. ClickHouse’s column-oriented storage, where values are stored sorted, means this data compresses extremely well - especially when combined with a range of specialized codecs for time-series data. Unlike other data stores, which require as much storage as the original data size of the data, typically in JSON format, ClickHouse compresses logs and traces on average up to 14x. Beyond providing significant storage savings for large Observability installations, this compression assists in accelerating queries as less data needs to be read from disk. -- **Fast Aggregations** - Observability solutions typically heavily involve the visualization of data through charts e.g. lines showing error rates or bar charts showing traffic sources. Aggregations, or GROUP BYs, are fundamental to powering these charts which must also be fast and responsive when applying filters in workflows for issue diagnosis. ClickHouse's column-oriented format combined with a vectorized query execution engine is ideal for fast aggregations, with sparse indexing allowing rapid filtering of data in response to users' actions. -- **Fast Linear scans** - While alternative technologies rely on inverted indices for fast querying of logs, these invariably result in high disk and resource utilization. While ClickHouse provides inverted indices as an additional optional index type, linear scans are highly parallelized and use all of the available cores on a machine (unless configured otherwise). This potentially allows 10s of GB/s per second (compressed) to be scanned for matches with [highly optimized text-matching operators](/en/sql-reference/functions/string-search-functions). -- **Familiarity of SQL** - SQL is the ubiquitous language with which all engineers are familiar. With over 50 years of development, it has proven itself as the de facto language for data analytics and remains the [3rd most popular programming language](https://clickhouse.com/blog/the-state-of-sql-based-observability#lingua-franca). Observability is just another data problem for which SQL is ideal. -- **Analytical functions** - ClickHouse extends ANSI SQL with analytical functions designed to make SQL queries simple and easier to write. These are essential for users performing root cause analysis where data needs to be sliced and diced. -- **Secondary indices** - ClickHouse supports secondary indexes, such as bloom filters, to accelerate specific query profiles. These can be optionally enabled at a column level, giving the user granular control and allowing them to assess the cost-performance benefit. -- **Open-source & Open standards** - As an open-source database, ClickHouse embraces open standards such as Open Telemetry. The ability to contribute and actively participate in projects is appealing while avoiding the challenges of vendor lock-in. - -## When should you use ClickHouse for Observability - -Using ClickHouse for observability data requires users to embrace SQL-based observability. We recommend [this blog post](https://clickhouse.com/blog/the-state-of-sql-based-observability) for a history of SQL-based observability, but in summary: - -SQL-based observability is for you if: - -- You or your team(s) are familiar with SQL (or want to learn it) -- You prefer adhering to open standards like OpenTelemetry to avoid lock-in and achieve extensibility. -- You are willing to run an ecosystem fueled by open-source innovation from collection to storage and visualization. -- You envision some growth to medium or large volumes of observability data under management (or even very large volumes) -- You want to be in control of the TCO (total cost of ownership) and avoid spiraling observability costs. -- You can't or don't want to get stuck with small data retention periods for your observability data just to manage the costs. - -SQL-based observability may not be for you if: - -- Learning (or generating!) SQL is not appealing to you or your team(s). -- You are looking for a packaged, end-to-end observability experience. -- Your observability data volumes are too small to make any significant difference (e.g. <150 GiB) and are not forecasted to grow. -- Your use case is metrics-heavy and needs PromQL. In that case, you can still use ClickHouse for logs and tracing beside Prometheus for metrics, unifying it at the presentation layer with Grafana. -- You prefer to wait for the ecosystem to mature more and SQL-based observability to get more turnkey. - -## Logs and traces - -The Observability use case has three distinct pillars: Logging, Tracing, and Metrics. Each has distinct data types and access patterns. - -We currently recommend ClickHouse for storing two types of observability data: - -- **Logs** - Logs are time-stamped records of events occurring within a system, capturing detailed information about various aspects of software operations. The data in logs is typically unstructured or semi-structured and can include error messages, user activity logs, system changes, and other events. Logs are crucial for troubleshooting, anomaly detection, and understanding the specific events leading up to issues within the system. - -``` -54.36.149.41 - - [22/Jan/2019:03:56:14 +0330] "GET -/filter/27|13%20%D9%85%DA%AF%D8%A7%D9%BE%DB%8C%DA%A9%D8%B3%D9%84,27|%DA%A9%D9%85%D8%AA%D8%B1%20%D8%A7%D8%B2%205%20%D9%85%DA%AF%D8%A7%D9%BE%DB%8C%DA%A9%D8%B3%D9%84,p53 HTTP/1.1" 200 30577 "-" "Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)" "-" -``` - -- **Traces** - Traces capture the journey of requests as they traverse through different services in a distributed system, detailing the path and performance of these requests. The data in traces is highly structured, consisting of spans and traces that map out each step a request takes, including timing information. Traces provide valuable insights into system performance, helping identify bottlenecks, latency issues, and optimize the efficiency of microservices. - -:::note Metrics -While ClickHouse can be used to store metrics data, this pillar is less mature in ClickHouse with pending support for features such as support for the Prometheus data format and PromQL. -::: - -### Distributed Tracing - -Distributed tracing is a critical feature of Observability. A distributed trace, simply called a trace, maps the journey of a request through a system. The request will originate from an end user or application and proliferate throughout a system, typically resulting in a flow of actions between microservices. By recording this sequence, and allowing the subsequent events to be correlated, it allows an observability user or SRE to be able to diagnose issues in an application flow irrespective of how complex or serverless the architecture is. - -Each trace consists of several spans, with the initial span associated with the request known as the root span. This root span captures the entire request from beginning to end. Subsequent spans beneath the root provide detailed insights into the various steps or operations that occur during the request. Without tracing, diagnosing performance issues in a distributed system can be extremely difficult. Tracing eases the process of debugging and comprehending distributed systems by detailing the sequence of events within a request as it moves through the system. - -Most observability vendors visualize this information as a waterfall, with relative timing shown using horizontal bars of proportional size. For example, in Grafana: - -NEEDS ALT - -
- -For users needing to familiarize themselves deeply with the concepts of logs and traces, we highly recommend the [OpenTelemetry documentation](https://opentelemetry.io/docs/concepts/). +| Page | Description | +|-------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [Introduction](./introduction.md) | This guide is designed for users looking to build their own SQL-based Observability solution using ClickHouse, focusing on logs and traces. | +| [Schema design](./schema-design.md) | Learn why users are recommended to create their own schema for logs and traces, along with some best practices for doing so. | +| [Managing data](./managing-data.md) | Deployments of ClickHouse for Observability invariably involve large datasets, which need to be managed. ClickHouse offers a number of features to assist with data management. | +| [Integrating OpenTelemetry](./integrating-opentelemetry.md) | Any Observability solution requires a means of collecting and exporting logs and traces. For this purpose, ClickHouse recommends the OpenTelemetry (OTel) project. Learn more about how to integrate it with ClickHouse. | +| [Using Grafana](./grafana.md) | Learn how to use Grafana, the preferred visualization tool for Observability data in ClickHouse, with ClickHouse. +| [Demo Application](./demo-application.md) | The Open Telemetry project includes a demo application. A maintained fork of this application with ClickHouse as a data source for logs and traces can be found linked on this page.| \ No newline at end of file diff --git a/docs/en/use-cases/observability/introduction.md b/docs/en/use-cases/observability/introduction.md new file mode 100644 index 00000000000..a810d9da6dd --- /dev/null +++ b/docs/en/use-cases/observability/introduction.md @@ -0,0 +1,100 @@ +--- +title: Introduction +description: Using ClickHouse as an observability solution +slug: /en/use-cases/observability/introduction +keywords: [observability, logs, traces, metrics, OpenTelemetry, Grafana, OTel] +--- + +# Using ClickHouse for Observability + +## Introduction + +This guide is designed for users looking to build their own SQL-based Observability solution using ClickHouse, focusing on logs and traces. This covers all aspects of building your own solution including considerations for ingestion, optimizing schemas for your access patterns and extracting structure from unstructured logs. + +ClickHouse alone is not an out-of-the-box solution for Observability. It can, however, be used as a highly efficient storage engine for Observability data, capable of unrivaled compression rates and lightning-fast query response times. In order for users to use ClickHouse within an Observability solution, both a user interface and data collection framework are required. We currently recommend using **Grafana** for visualization of Observability signals and **OpenTelemetry** for data collection (both are officially supported integrations). + +NEEDS ALT + +
+ +:::note Not just OpenTelemetry +While our recommendation is to use the OpenTelemetry (OTel) project for data collection, similar architectures can be produced using other frameworks and tools e.g. Vector and Fluentd (see [an example](https://clickhouse.com/blog/kubernetes-logs-to-clickhouse-fluent-bit) with Fluent Bit). Alternative visualization tools also exist including Superset and Metabase. +::: + +## Why use ClickHouse? + +The most important feature of any centralized Observability store is its ability to quickly aggregate, analyze, and search through vast amounts of log data from diverse sources. This centralization streamlines troubleshooting, making it easier to pinpoint the root causes of service disruptions. + +With users increasingly price-sensitive and finding the cost of these out-of-the-box offerings to be high and unpredictable in comparison to the value they bring, cost-efficient and predictable log storage, where query performance is acceptable, is more valuable than ever. + +Due to its performance and cost efficiency, ClickHouse has become the de facto standard for logging and tracing storage engines in observability products. + +More specifically, the following means ClickHouse is ideally suited for the storage of observability data: + +- **Compression** - Observability data typically contains fields for which the values are taken from a distinct set e.g. HTTP codes or service names. ClickHouse’s column-oriented storage, where values are stored sorted, means this data compresses extremely well - especially when combined with a range of specialized codecs for time-series data. Unlike other data stores, which require as much storage as the original data size of the data, typically in JSON format, ClickHouse compresses logs and traces on average up to 14x. Beyond providing significant storage savings for large Observability installations, this compression assists in accelerating queries as less data needs to be read from disk. +- **Fast Aggregations** - Observability solutions typically heavily involve the visualization of data through charts e.g. lines showing error rates or bar charts showing traffic sources. Aggregations, or GROUP BYs, are fundamental to powering these charts which must also be fast and responsive when applying filters in workflows for issue diagnosis. ClickHouse's column-oriented format combined with a vectorized query execution engine is ideal for fast aggregations, with sparse indexing allowing rapid filtering of data in response to users' actions. +- **Fast Linear scans** - While alternative technologies rely on inverted indices for fast querying of logs, these invariably result in high disk and resource utilization. While ClickHouse provides inverted indices as an additional optional index type, linear scans are highly parallelized and use all of the available cores on a machine (unless configured otherwise). This potentially allows 10s of GB/s per second (compressed) to be scanned for matches with [highly optimized text-matching operators](/en/sql-reference/functions/string-search-functions). +- **Familiarity of SQL** - SQL is the ubiquitous language with which all engineers are familiar. With over 50 years of development, it has proven itself as the de facto language for data analytics and remains the [3rd most popular programming language](https://clickhouse.com/blog/the-state-of-sql-based-observability#lingua-franca). Observability is just another data problem for which SQL is ideal. +- **Analytical functions** - ClickHouse extends ANSI SQL with analytical functions designed to make SQL queries simple and easier to write. These are essential for users performing root cause analysis where data needs to be sliced and diced. +- **Secondary indices** - ClickHouse supports secondary indexes, such as bloom filters, to accelerate specific query profiles. These can be optionally enabled at a column level, giving the user granular control and allowing them to assess the cost-performance benefit. +- **Open-source & Open standards** - As an open-source database, ClickHouse embraces open standards such as Open Telemetry. The ability to contribute and actively participate in projects is appealing while avoiding the challenges of vendor lock-in. + +## When should you use ClickHouse for Observability + +Using ClickHouse for observability data requires users to embrace SQL-based observability. We recommend [this blog post](https://clickhouse.com/blog/the-state-of-sql-based-observability) for a history of SQL-based observability, but in summary: + +SQL-based observability is for you if: + +- You or your team(s) are familiar with SQL (or want to learn it) +- You prefer adhering to open standards like OpenTelemetry to avoid lock-in and achieve extensibility. +- You are willing to run an ecosystem fueled by open-source innovation from collection to storage and visualization. +- You envision some growth to medium or large volumes of observability data under management (or even very large volumes) +- You want to be in control of the TCO (total cost of ownership) and avoid spiraling observability costs. +- You can't or don't want to get stuck with small data retention periods for your observability data just to manage the costs. + +SQL-based observability may not be for you if: + +- Learning (or generating!) SQL is not appealing to you or your team(s). +- You are looking for a packaged, end-to-end observability experience. +- Your observability data volumes are too small to make any significant difference (e.g. <150 GiB) and are not forecasted to grow. +- Your use case is metrics-heavy and needs PromQL. In that case, you can still use ClickHouse for logs and tracing beside Prometheus for metrics, unifying it at the presentation layer with Grafana. +- You prefer to wait for the ecosystem to mature more and SQL-based observability to get more turnkey. + +## Logs and traces + +The Observability use case has three distinct pillars: Logging, Tracing, and Metrics. Each has distinct data types and access patterns. + +We currently recommend ClickHouse for storing two types of observability data: + +- **Logs** - Logs are time-stamped records of events occurring within a system, capturing detailed information about various aspects of software operations. The data in logs is typically unstructured or semi-structured and can include error messages, user activity logs, system changes, and other events. Logs are crucial for troubleshooting, anomaly detection, and understanding the specific events leading up to issues within the system. + +``` +54.36.149.41 - - [22/Jan/2019:03:56:14 +0330] "GET +/filter/27|13%20%D9%85%DA%AF%D8%A7%D9%BE%DB%8C%DA%A9%D8%B3%D9%84,27|%DA%A9%D9%85%D8%AA%D8%B1%20%D8%A7%D8%B2%205%20%D9%85%DA%AF%D8%A7%D9%BE%DB%8C%DA%A9%D8%B3%D9%84,p53 HTTP/1.1" 200 30577 "-" "Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)" "-" +``` + +- **Traces** - Traces capture the journey of requests as they traverse through different services in a distributed system, detailing the path and performance of these requests. The data in traces is highly structured, consisting of spans and traces that map out each step a request takes, including timing information. Traces provide valuable insights into system performance, helping identify bottlenecks, latency issues, and optimize the efficiency of microservices. + +:::note Metrics +While ClickHouse can be used to store metrics data, this pillar is less mature in ClickHouse with pending support for features such as support for the Prometheus data format and PromQL. +::: + +### Distributed Tracing + +Distributed tracing is a critical feature of Observability. A distributed trace, simply called a trace, maps the journey of a request through a system. The request will originate from an end user or application and proliferate throughout a system, typically resulting in a flow of actions between microservices. By recording this sequence, and allowing the subsequent events to be correlated, it allows an observability user or SRE to be able to diagnose issues in an application flow irrespective of how complex or serverless the architecture is. + +Each trace consists of several spans, with the initial span associated with the request known as the root span. This root span captures the entire request from beginning to end. Subsequent spans beneath the root provide detailed insights into the various steps or operations that occur during the request. Without tracing, diagnosing performance issues in a distributed system can be extremely difficult. Tracing eases the process of debugging and comprehending distributed systems by detailing the sequence of events within a request as it moves through the system. + +Most observability vendors visualize this information as a waterfall, with relative timing shown using horizontal bars of proportional size. For example, in Grafana: + +NEEDS ALT + +
+ +For users needing to familiarize themselves deeply with the concepts of logs and traces, we highly recommend the [OpenTelemetry documentation](https://opentelemetry.io/docs/concepts/). diff --git a/docusaurus.config.js b/docusaurus.config.js index 01118ced983..5d4b1a08551 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -2399,6 +2399,10 @@ const config = { { from: '/en/optimize', to: '/en/operations/overview' + }, + { + from: '/en/observability', + to: '/en/use-cases/observability/introduction' } ], }, diff --git a/sidebars.js b/sidebars.js index 8342950d404..136f625f513 100644 --- a/sidebars.js +++ b/sidebars.js @@ -11,6 +11,7 @@ const sidebars = { label: "Introduction", collapsed: false, collapsible: false, + link: {type: "doc", id: "en/introduction-index"}, items: [ "en/intro", "en/quick-start", @@ -24,9 +25,10 @@ const sidebars = { className: "top-nav-item", collapsed: false, collapsible: false, + link: {type: "doc", id: "en/concepts/index"}, items: [ - "en/concepts/why-clickhouse-is-so-fast", "en/concepts/olap", + "en/concepts/why-clickhouse-is-so-fast", "en/about-us/distinctive-features", "en/concepts/glossary", { @@ -34,13 +36,13 @@ const sidebars = { label: "FAQ", collapsed: true, collapsible: true, + link: {type: "doc", id: "en/faq/index"}, items: [ "en/faq/general/index", "en/faq/general/mapreduce", "en/faq/general/ne-tormozit", "en/faq/general/olap", "en/faq/general/who-is-using-clickhouse", - "en/concepts/why-clickhouse-is-so-fast", ], } ], @@ -50,6 +52,7 @@ const sidebars = { label: "Starter Guides", collapsed: false, collapsible: false, + link: {type: "doc", id: "en/starter-guides/index"}, items: [ "en/guides/creating-tables", "en/guides/inserting-data", @@ -62,18 +65,16 @@ const sidebars = { label: "Use Case Guides", collapsed: false, collapsible: false, + link: {type: "doc", id: "en/use-cases/index"}, items: [ { type: "category", label: "Observability", collapsed: true, collapsible: true, + link: {type: "doc", id: "en/use-cases/observability/index"}, items: [ - { - type: "doc", - label: "Introduction", - id: "en/use-cases/observability/index", - }, + "en/use-cases/observability/introduction", "en/use-cases/observability/schema-design", "en/use-cases/observability/managing-data", "en/use-cases/observability/integrating-opentelemetry", @@ -88,12 +89,12 @@ const sidebars = { label: "Migration Guides", collapsed: false, collapsible: false, + link: {type: "doc", id: "en/migrations/index"}, items: [ { type: "category", label: "BigQuery", - collapsed: true, - collapsible: true, + link: {type: "doc", id: "en/migrations/bigquery/index"}, items: [ { type: "doc", @@ -115,6 +116,7 @@ const sidebars = { label: "PostgreSQL", collapsed: true, collapsible: true, + link: {type: "doc", id: "en/migrations/postgres/index"}, items: [ { type: "doc", From e459519c45a1099f433a9e664d539433fa39dcc7 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 31 Jan 2025 11:17:17 +0100 Subject: [PATCH 2/3] change slug so that it doesn't overwrite an existing route --- docs/en/introduction-index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/introduction-index.md b/docs/en/introduction-index.md index 0cb77ff450f..690417070fa 100644 --- a/docs/en/introduction-index.md +++ b/docs/en/introduction-index.md @@ -1,5 +1,5 @@ --- -slug: /en/introduction +slug: /en/introduction-clickhouse title: Introduction description: Landing page for Introduction pagination_next: null From 6973a7448e2d01f14db3493742eb2a4dc840f2dd Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 31 Jan 2025 17:28:01 +0100 Subject: [PATCH 3/3] review fixes --- docs/en/migrations/postgres/index.md | 22 +++++++++++----------- docusaurus.config.js | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/en/migrations/postgres/index.md b/docs/en/migrations/postgres/index.md index 22190011575..0e69cb63771 100644 --- a/docs/en/migrations/postgres/index.md +++ b/docs/en/migrations/postgres/index.md @@ -5,14 +5,14 @@ pagination_next: null title: PostgreSQL --- -| Page | Description | -|--------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - [Introduction](./overview.md) | Introduction page for this section | - [Connecting to PostgreSQL](../../integrations/data-ingestion/dbms/postgresql/connecting-to-postgresql.md) | This page covers the following options for integrating PostgreSQL with ClickHouse: ClickPipes, PeerDB, PostgreSQL table engine, MaterializedPostgreSQL database engine. | - [Comparing PostgreSQL and ClickHouse](../../integrations/data-ingestion/dbms/postgresql/postgres-vs-clickhouse.md) | Explores Postgres vs ClickHouse - Equivalent and different concepts. | - [Loading data from PostgreSQL to ClickHouse](./dataset.md) | Part 1 of a guide on migrating from PostgreSQL to ClickHouse. - [Designing Schemas](./designing-schemas.md) | Part 2 of a guide on migrating from PostgreSQL to ClickHouse. | - [Data modeling techniques](./data-modeling-techniques.md) | Part 3 of a guide on migrating from PostgreSQL to ClickHouse. | - [Rewrite PostgreSQL Queries](../../integrations/data-ingestion/dbms/postgresql/rewriting-postgres-queries.md) | Part 4 of a guide on migrating from PostgreSQL to ClickHouse. | - [How to insert data from PostgreSQL](../../integrations/data-ingestion/dbms/postgresql/inserting-data.md) | Learn how to bulk load data from PostgreSQL to ClickHouse. | - [Data Type Mappings for PostgreSQL](../../integrations/data-ingestion/dbms/postgresql/data-type-mappings.md) | Table showing the equivalent ClickHouse data types for Postgres. | +| Page | Description | +|----------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [Introduction](./overview.md) | Introduction page for this section | +| [Connecting to PostgreSQL](../../integrations/data-ingestion/dbms/postgresql/connecting-to-postgresql.md) | This page covers the following options for integrating PostgreSQL with ClickHouse: ClickPipes, PeerDB, PostgreSQL table engine, MaterializedPostgreSQL database engine. | +| [Comparing PostgreSQL and ClickHouse](../../integrations/data-ingestion/dbms/postgresql/postgres-vs-clickhouse.md) | Explores Postgres vs ClickHouse - Equivalent and different concepts. | +| [Loading data from PostgreSQL to ClickHouse](./dataset.md) | Part 1 of a guide on migrating from PostgreSQL to ClickHouse. | +| [Designing Schemas](./designing-schemas.md) | Part 2 of a guide on migrating from PostgreSQL to ClickHouse. | +| [Data modeling techniques](./data-modeling-techniques.md) | Part 3 of a guide on migrating from PostgreSQL to ClickHouse. | +| [Rewrite PostgreSQL Queries](../../integrations/data-ingestion/dbms/postgresql/rewriting-postgres-queries.md) | Part 4 of a guide on migrating from PostgreSQL to ClickHouse. | +| [How to insert data from PostgreSQL](../../integrations/data-ingestion/dbms/postgresql/inserting-data.md) | Learn how to bulk load data from PostgreSQL to ClickHouse. | +| [Data Type Mappings for PostgreSQL](../../integrations/data-ingestion/dbms/postgresql/data-type-mappings.md) | Table showing the equivalent ClickHouse data types for Postgres. | diff --git a/docusaurus.config.js b/docusaurus.config.js index 5d4b1a08551..f9fe6c79b13 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -2402,7 +2402,7 @@ const config = { }, { from: '/en/observability', - to: '/en/use-cases/observability/introduction' + to: '/en/use-cases/observability' } ], },