initial changes

dhtclk · dhtclk · commit 14fa6cbe7046 · 2025-07-21T11:03:40.000-05:00
diff --git a/src/components/GlossaryTooltip/generate-tooltips.py b/src/components/GlossaryTooltip/generate-tooltips.py
@@ -0,0 +1,90 @@
+import os
+import re
+import json
+
+GLOSSARY_JSON_PATH = "/home/dtran/clickhouse-docs/src/components/GlossaryTooltip/glossary.json"
+DOCS_PATH = "/home/dtran/clickhouse-docs/docs"
+
+IGNORE_DIRS = {
+    "changelog", "changelogs", "i18n", "scripts", "static", "styles",
+    "contribute", "about-us", "_placeholders"
+}
+
+GLOSSARY_IMPORT = "import GlossaryTooltip from '@site/src/components/GlossaryTooltip/GlossaryTooltip.jsx';"
+
+def load_glossary(path):
+    with open(path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+
+def mask_ignores(text):
+    placeholders = {}
+    patterns = {
+        'codeblocks': r'```[\s\S]*?```',
+        'inline_code': r'`[^`\n]+`',
+        'frontmatter': r'^---[\s\S]+?---',
+        'imports': r'^import .*?;$',
+        'headers': r'^(#+ .*)$',
+        'html_blocks': r'<(div|details|summary)[\s\S]*?<\/\1>',
+        'blockquotes': r'^\s*>.*$',
+        'links': r'\[([^\]]+)\]\([^)]+\)',
+        'images': r'!\[[^\]]*\]\([^)]+\)',
+        'comments': r'<!--[\s\S]*?-->',
+    }
+
+    for name, pattern in patterns.items():
+        regex = re.compile(pattern, re.MULTILINE)
+        matches = list(regex.finditer(text))
+        for i, match in enumerate(matches):
+            key = f"__MASKED_{name.upper()}_{i}__"
+            placeholders[key] = match.group(0)
+            text = text.replace(match.group(0), key)
+
+    return text, placeholders
+
+def unmask_ignores(text, placeholders):
+    for key, value in placeholders.items():
+        text = text.replace(key, value)
+    return text
+
+def inject_tooltips(text, glossary):
+    def replacement(match):
+        word = match.group(0)
+        definition = glossary.get(word)
+        if definition:
+            return f'<GlossaryTooltip term="{word}" definition="{definition}">{word}</GlossaryTooltip>'
+        return word
+
+    pattern = r'\b(' + '|'.join(re.escape(k) for k in glossary.keys()) + r')\b'
+    return re.sub(pattern, replacement, text)
+
+def process_file(path, glossary):
+    with open(path, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    masked_text, placeholders = mask_ignores(content)
+    replaced = inject_tooltips(masked_text, glossary)
+    final_text = unmask_ignores(replaced, placeholders)
+
+    if '<GlossaryTooltip' in final_text and GLOSSARY_IMPORT not in final_text:
+        final_text = GLOSSARY_IMPORT + "\n" + final_text
+
+    if '<GlossaryTooltip' in final_text and final_text != content:
+        new_path = path.replace(".md", ".mdx")
+        with open(new_path, 'w', encoding='utf-8') as f:
+            f.write(final_text)
+        os.remove(path)
+        print(f"✔ Renamed and updated: {path} -> {new_path}")
+    else:
+        print(f"– Skipped (no change): {path}")
+
+def process_directory(base_path, glossary):
+    for root, dirs, files in os.walk(base_path):
+        dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
+        for file in files:
+            if file.endswith(".md") and not file.startswith("_"):
+                path = os.path.join(root, file)
+                process_file(path, glossary)
+
+if __name__ == "__main__":
+    glossary = load_glossary(GLOSSARY_JSON_PATH)
+    process_directory(DOCS_PATH, glossary)
diff --git a/src/components/GlossaryTooltip/glossary.json b/src/components/GlossaryTooltip/glossary.json
@@ -0,0 +1,9 @@
+{
+  "Atomicity": "Atomicity ensures that a transaction (a series of database operations) is treated as a single, indivisible unit. This means that either all operations within the transaction occur, or none do. An example of an atomic transaction is transferring money from one bank account to another. If either step of the transfer fails, the transaction fails, and the money stays in the first account. Atomicity ensures no money is lost or created.",
+  "Cluster": "A collection of nodes (servers) that work together to store and process data.",
+  "CMEK": "Customer-managed encryption keys (CMEK) allow customers to use their key-management service (KMS) key to encrypt the ClickHouse disk data key and protect their data at rest.",
+  "Dictionary": "A dictionary is a mapping of key-value pairs that is useful for various types of reference lists. It is a powerful feature that allows for the efficient use of dictionaries in queries, which is often more efficient than using a `JOIN` with reference tables.",
+  "Parts": "A physical file on a disk that stores a portion of the table's data. This is different from a partition, which is a logical division of a table's data that is created using a partition key.",
+  "Replica": "A copy of the data stored in a ClickHouse database. You can have any number of replicas of the same data for redundancy and reliability. Replicas are used in conjunction with the ReplicatedMergeTree table engine, which enables ClickHouse to keep multiple copies of data in sync across different servers.",
+  "Shard": "A subset of data. ClickHouse always has at least one shard for your data. If you do not split the data across multiple servers, your data will be stored in one shard. Sharding data across multiple servers can be used to divide the load if you exceed the capacity of a single server."
+}
diff --git a/src/css/custom.scss b/src/css/custom.scss
@@ -1423,3 +1423,50 @@ input::-ms-input-placeholder { /* Microsoft Edge */
 .DocSearch-Cancel {
   color: var(--docsearch-text-color) !important;
 }
+
+.tooltip {
+  position: relative;
+  cursor: help;
+  border-bottom: 1px dotted #aaa;
+  outline: none; /* remove default outline, but keep focus styling if needed */
+}
+
+.tooltipText {
+  visibility: hidden;
+  width: max-content;
+  max-width: 280px;
+  background-color: #333;
+  color: #fff;
+  text-align: left;
+  padding: 8px 12px;
+  border-radius: 4px;
+
+  position: absolute;
+  z-index: 100;
+  top: 125%;       /* place tooltip below the term */
+  left: 50%;
+  transform: translateX(-50%);
+
+  opacity: 0;
+  transition: opacity 0.2s ease-in-out;
+
+  white-space: pre-wrap;
+  box-shadow: 0 0 8px rgba(0,0,0,0.3);
+  pointer-events: none; /* tooltip itself should not capture pointer */
+}
+
+.tooltipText.visible {
+  visibility: visible;
+  opacity: 1;
+}
+
+.tooltip-link {
+  text-decoration: underline dotted;
+  color: inherit; /* keep text color */
+}
+
+.tooltip-link:hover,
+.tooltip-link:focus {
+  text-decoration: underline solid;
+  outline: none;
+}