ClickHouse
diff --git a/‎.github/workflows/table_of_contents.yml
Lines changed: 74 additions & 0 deletions b/‎.github/workflows/table_of_contents.yml
Lines changed: 74 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎copyClickhouseRepoDocs.sh
Lines changed: 7 additions & 4 deletions b/‎copyClickhouseRepoDocs.sh
Lines changed: 7 additions & 4 deletions
diff --git a/‎docs/en/about-us/history.md
Lines changed: 10 additions & 9 deletions b/‎docs/en/about-us/history.md
Lines changed: 10 additions & 9 deletions
diff --git a/‎docs/en/cloud/reference/cloud-compatibility.md
Lines changed: 1 addition & 15 deletions b/‎docs/en/cloud/reference/cloud-compatibility.md
Lines changed: 1 addition & 15 deletions
diff --git a/‎docs/en/guides/developer/lightweight-update.md
Lines changed: 2 additions & 6 deletions b/‎docs/en/guides/developer/lightweight-update.md
Lines changed: 2 additions & 6 deletions
diff --git a/‎docs/en/integrations/cli.mdx
Lines changed: 1 addition & 0 deletions b/‎docs/en/integrations/cli.mdx
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/en/integrations/language-clients/java/client-v1.md
Lines changed: 2 additions & 10 deletions b/‎docs/en/integrations/language-clients/java/client-v1.md
Lines changed: 2 additions & 10 deletions
diff --git a/‎docs/en/integrations/language-clients/java/client-v2.md
Lines changed: 8 additions & 3 deletions b/‎docs/en/integrations/language-clients/java/client-v2.md
Lines changed: 8 additions & 3 deletions
diff --git a/‎docs/en/integrations/language-clients/java/index.md
Lines changed: 2 additions & 3 deletions b/‎docs/en/integrations/language-clients/java/index.md
Lines changed: 2 additions & 3 deletions
@@ -0,0 +1,74 @@
+# This GitHub Action is used for triggering updates of
+# the toc.json files present in any directory that
+# needs an automatically generated table of contents.
+
+name: Generate Table of Contents files
+
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
+on:
+  schedule:
+    - cron: '0 0 * * *'
+  workflow_dispatch:
+
+permissions:
+  contents: write
+
+jobs:
+  generate_toc_formats:
+    runs-on: ubuntu-latest
+    steps:
+      # Step 1: Check out the repository
+      - name: Check out repository
+        uses: actions/checkout@v3
+
+       # Step 2 - Setup Python
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.x'
+
+      # Step 3: Install Python dependencies
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r 'scripts/table-of-contents-generator/requirements.txt'
+
+      # Step 4 -  Pull main repo docs, run script to generate TOCs:
+      - name: Generate TOCs
+        run: |
+          yarn prep-from-master
+          python -u ./scripts/table-of-contents-generator/toc_gen.py --dir="docs/en/interfaces/formats" --single-toc --out="table-of-contents-files" --ignore "_snippets"
+
+      # Step 5 - Fail the workflow if script returns exit code 1
+      - name: Check exit code
+        run: |
+          if [[ "${{ steps.toc_gen.outcome }}" == "failure" ]]; then
+            echo "Ran into trouble generating a table of contents. See the logs for details."
+            exit 1
+          fi
+
+      # Step 6 - Check if anything was actually updated
+      - name: Check for Changes
+        id: check_changes
+        run: |
+          git status -u
+          if [[ -n "$(git diff --exit-code)" ]]; then
+            echo "Changes detected."
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "No changes detected."
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+          fi
+
+      # Step 7 - Commit and Push generated Table Of Contents files
+      - uses: stefanzweifel/git-auto-commit-action@v5
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          commit_message: "Autogenerate table of contents files from GitHub action - $(date '+%Y-%m-%d %H:%M:%S')"
+          file_pattern: 'table-of-contents-files/*'
+          branch: generate_table_of_contents
+          create_branch: true
@@ -49,3 +49,6 @@ docs/en/cloud/manage/api/services-api-reference.md
 .vscode
 .aspell.en.prepl
 *.md.bak
+
+# Don't ignore generated table of contents files
+!toc.json
@@ -31,10 +31,13 @@ echo "[$SCRIPT_NAME] Copying completed"
 
 echo "[$SCRIPT_NAME] Generate changelog"
 cp docs/en/_placeholders/changelog/_index.md docs/en/whats-new/changelog/index.md
-if grep -q '^# $(date +%Y) Changelog' ClickHouse/CHANGELOG.md; then
-  sed '/^# $(date +%Y) Changelog/d' ClickHouse/CHANGELOG.md > temp.txt
-  cat >> docs/en/whats-new/changelog/index.md
-  rm temp.txt
+if grep -q "^# $(date +%Y) Changelog" ClickHouse/CHANGELOG.md; then
+  echo "Generating $(date +%Y) Changelog..."
+  sed "/^# $(date +%Y) Changelog/d" ClickHouse/CHANGELOG.md > temp.txt
+  echo "Changelog copied to temp.txt"
+  cat temp.txt >> docs/en/whats-new/changelog/index.md
+  echo "Changelog written to docs/en/whats-new/changelog/index.md"
+  rm -f temp.txt
   echo "$(date +%Y) Changelog was updated."
 else
   current_year="$(date +%Y)"
 
@@ -2,16 +2,17 @@
 slug: /en/about-us/history
 sidebar_label: ClickHouse History
 sidebar_position: 40
-description: Where it all began...
+description: History of ClickHouse development
+tags: ['history', 'development', 'Metrica']
 ---
 
 # ClickHouse History {#clickhouse-history}
 
-ClickHouse has been developed initially to power [Yandex.Metrica](https://metrica.yandex.com/), [the second largest web analytics platform in the world](http://w3techs.com/technologies/overview/traffic_analysis/all), and continues to be the core component of this system. With more than 13 trillion records in the database and more than 20 billion events daily, ClickHouse allows generating custom reports on the fly directly from non-aggregated data. This article briefly covers the goals of ClickHouse in the early stages of its development.
+ClickHouse was initially developed to power [Yandex.Metrica](https://metrica.yandex.com/), [the second largest web analytics platform in the world](http://w3techs.com/technologies/overview/traffic_analysis/all), and continues to be its core component. With more than 13 trillion records in the database and more than 20 billion events daily, ClickHouse allows generating custom reports on the fly directly from non-aggregated data. This article briefly covers the goals of ClickHouse in the early stages of its development.
 
-Yandex.Metrica builds customized reports on the fly based on hits and sessions, with arbitrary segments defined by the user. Doing so often requires building complex aggregates, such as the number of unique users. New data for building a report arrives in real-time.
+Yandex.Metrica builds customized reports on the fly based on hits and sessions, with arbitrary segments defined by the user. Doing so often requires building complex aggregates, such as the number of unique users, with new data for building reports arriving in real-time.
 
-As of April 2014, Yandex.Metrica was tracking about 12 billion events (page views and clicks) daily. All these events must be stored to build custom reports. A single query may require scanning millions of rows within a few hundred milliseconds, or hundreds of millions of rows in just a few seconds.
+As of April 2014, Yandex.Metrica was tracking about 12 billion events (page views and clicks) daily. All these events needed to be stored, in order to build custom reports. A single query may have required scanning millions of rows within a few hundred milliseconds, or hundreds of millions of rows in just a few seconds.
 
 ## Usage in Yandex.Metrica and Other Yandex Services {#usage-in-yandex-metrica-and-other-yandex-services}
 
@@ -26,30 +27,30 @@ ClickHouse also plays a key role in the following processes:
 - Running queries for debugging the Yandex.Metrica engine.
 - Analyzing logs from the API and the user interface.
 
-Nowadays, there are multiple dozen ClickHouse installations in other Yandex services and departments: search verticals, e-commerce, advertisement, business analytics, mobile development, personal services, and others.
+Nowadays, there are a multiple dozen ClickHouse installations in other Yandex services and departments: search verticals, e-commerce, advertisement, business analytics, mobile development, personal services, and others.
 
 ## Aggregated and Non-aggregated Data {#aggregated-and-non-aggregated-data}
 
 There is a widespread opinion that to calculate statistics effectively, you must aggregate data since this reduces the volume of data.
 
-But data aggregation comes with a lot of limitations:
+However data aggregation comes with a lot of limitations:
 
 - You must have a pre-defined list of required reports.
 - The user can’t make custom reports.
 - When aggregating over a large number of distinct keys, the data volume is barely reduced, so aggregation is useless.
 - For a large number of reports, there are too many aggregation variations (combinatorial explosion).
 - When aggregating keys with high cardinality (such as URLs), the volume of data is not reduced by much (less than twofold).
 - For this reason, the volume of data with aggregation might grow instead of shrink.
-- Users do not view all the reports we generate for them. A large portion of those calculations is useless.
-- The logical integrity of data may be violated for various aggregations.
+- Users do not view all the reports we generate for them. A large portion of those calculations are useless.
+- The logical integrity of the data may be violated for various aggregations.
 
 If we do not aggregate anything and work with non-aggregated data, this might reduce the volume of calculations.
 
 However, with aggregation, a significant part of the work is taken offline and completed relatively calmly. In contrast, online calculations require calculating as fast as possible, since the user is waiting for the result.
 
 Yandex.Metrica has a specialized system for aggregating data called Metrage, which was used for the majority of reports.
 Starting in 2009, Yandex.Metrica also used a specialized OLAP database for non-aggregated data called OLAPServer, which was previously used for the report builder.
-OLAPServer worked well for non-aggregated data, but it had many restrictions that did not allow it to be used for all reports as desired. These included the lack of support for data types (only numbers), and the inability to incrementally update data in real-time (it could only be done by rewriting data daily). OLAPServer is not a DBMS, but a specialized DB.
+OLAPServer worked well for non-aggregated data, but it had many restrictions that did not allow it to be used for all reports as desired. These included a lack of support for data types (numbers only), and the inability to incrementally update data in real-time (it could only be done by rewriting data daily). OLAPServer is not a DBMS, but a specialized DB.
 
 The initial goal for ClickHouse was to remove the limitations of OLAPServer and solve the problem of working with non-aggregated data for all reports, but over the years, it has grown into a general-purpose database management system suitable for a wide range of analytical tasks.
 
@@ -122,19 +122,5 @@ ClickHouse Cloud is tuned for variable workloads, and for that reason most syste
 As part of creating the ClickHouse service, we create a default database, and the default user that has broad permissions to this database. This initial user can create additional users and assign their permissions to this database. Beyond this, the ability to enable the following security features within the database using Kerberos, LDAP, or SSL X.509 certificate authentication are not supported at this time.
 
 ## Roadmap
-The table below summarizes our efforts to expand some of the capabilities described above. If you have feedback, please [submit it here](mailto:[email protected]).
 
-| Capability                                                              | Status |
-|-------------------------------------------------------------------------|:----------------------------------------|
-|Dictionary support: PostgreSQL, MySQL, remote and local ClickHouse servers, Redis, MongoDB and HTTP sources | **Added in GA** |
-|SQL user-defined functions (UDFs)                                        | **Added in GA**                         |
-|MySQL and PostgreSQL engine                                              | **Added in GA**                         |
-|MySQL interface                                                          | **Added in GA**                         |
-|Redis                                                                    | Coming soon                             |
-|Postgres interfaces                                                      | Evaluating demand                       |
-|Engines for SQLite, ODBC, HDFS, and Hive                                 | Evaluating demand                       |
-|Protobuf, Cap'n'Proto formats                                            | Evaluating demand                       |
-|Kafka Table Engine                                                       | Not recommended; see alternatives above |
-|JDBC Table Engine                                                        | Not recommended                         |
-|EmbeddedRocksDB Engine                                                   | Evaluating demand                       |
-|Executable user-defined functions                                        | Evaluating demand                       |
+The table below summarizes our efforts to expand some of the capabilities described above. If you have feedback, please [submit it here](mailto:[email protected]).
@@ -5,12 +5,8 @@ title: Lightweight Update
 keywords: [lightweight update]
 ---
 
-import CloudAvailableBadge from '@theme/badges/CloudAvailableBadge';
-
 ## Lightweight Update
 
-<CloudAvailableBadge/>
-
 When lightweight updates are enabled, updated rows are marked as updated immediately and subsequent `SELECT` queries will automatically return with the changed values. When lightweight updates are not enabled, you may have to wait for your mutations to be applied via a background process to see the changed values.
 
 Lightweight updates can be enabled for `MergeTree`-family tables by enabling the query-level setting `apply_mutations_on_fly`.
@@ -23,7 +19,7 @@ SET apply_mutations_on_fly = 1;
 
 Let's create a table and run some mutations:
 ```sql
-CREATE TABLE test_on_fly_mutations (id UInt64, v String) 
+CREATE TABLE test_on_fly_mutations (id UInt64, v String)
 ENGINE = MergeTree ORDER BY id;
 
 -- Disable background materialization of mutations to showcase
@@ -93,4 +89,4 @@ These behaviours are controlled by the following settings:
 - `mutations_execute_nondeterministic_on_initiator` - if true, non-deterministic functions are executed on the initiator replica and are replaced as literals in `UPDATE` and `DELETE` queries. Default value: `false`.
 - `mutations_execute_subqueries_on_initiator` - if true, scalar subqueries are executed on the initiator replica and are replaced as literals in `UPDATE` and `DELETE` queries. Default value: `false`.
  - `mutations_max_literal_size_to_replace` - The maximum size of serialized literals in bytes to replace in `UPDATE` and `DELETE` queries. Default value: `16384` (16 KiB).
- 
+
@@ -3,6 +3,7 @@ sidebar_position: 30
 sidebar_label: clickhouse-client
 title: clickhouse-client
 slug: /en/integrations/sql-clients/cli
+displayed_sidebar: integrations
 ---
 
 import Content from '@site/docs/en/interfaces/cli.md';
 
@@ -1,21 +1,13 @@
----
-sidebar_label: Client V1
-sidebar_position: 3
-keywords: [clickhouse, java, client, integrate]
-description: Java ClickHouse Connector v1
-slug: /en/integrations/java/client-v1
----
-
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 import CodeBlock from '@theme/CodeBlock';
 
-# Client (V1)
+# Client (0.7.x and earlier)
 
 Java client library to communicate with a DB server thru its protocols. Current implementation supports only [HTTP interface](/docs/en/interfaces/http). The library provides own API to send requests to a server.  
 
 :::warning Deprecation
-This library will be deprecated soon. Use Client-v2 for new projects
+This library will be deprecated soon. Use the latest [Java Client](/docs/en/integrations/language-clients/java/client-v2.md) for new projects
 :::
 
 ## Setup
 
@@ -1,8 +1,8 @@
 ---
-sidebar_label: Client V2
+sidebar_label: Client 0.8+
 sidebar_position: 2
 keywords: [clickhouse, java, client, integrate]
-description: Java ClickHouse Connector v2
+description: Java ClickHouse Connector 0.8+
 slug: /en/integrations/java/client-v2
 ---
 
@@ -12,7 +12,12 @@ import CodeBlock from '@theme/CodeBlock';
 
 # Java Client (V2)
 
-Java client library to communicate with a DB server through its protocols. The current implementation only supports the [HTTP interface](/docs/en/interfaces/http). The library provides its own API to send requests to a server. The library also provides tools to work with different binary data formats (RowBinary* & Native*).  
+Java client library to communicate with a DB server through its protocols. The current implementation only supports the [HTTP interface](/docs/en/interfaces/http). 
+The library provides its own API to send requests to a server. The library also provides tools to work with different binary data formats (RowBinary* & Native*).  
+
+:::note
+If you're looking for a prior version of the java client docs, please see [here](/docs/en/integrations/language-clients/java/client-v1.md).
+:::
 
 ## Setup
 
 
@@ -11,9 +11,8 @@ import CodeBlock from '@theme/CodeBlock';
 
 # Java Clients Overview
 
-- [Client-V2](./client-v2.md)
-- [Client-V1 (Old)](./client-v1.md)
-- [JDBC Driver](./jdbc-driver.md)
+- [Client 0.8+](./client-v2.md)
+- [JDBC 0.8+](./jdbc-v2.md)
 - [R2DBC Driver](./r2dbc.md)
 
 ## ClickHouse Client