Skip to content

Commit 14fa6cb

Browse files
committed
initial changes
1 parent f373522 commit 14fa6cb

File tree

3 files changed

+146
-0
lines changed

3 files changed

+146
-0
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import os
2+
import re
3+
import json
4+
5+
GLOSSARY_JSON_PATH = "/home/dtran/clickhouse-docs/src/components/GlossaryTooltip/glossary.json"
6+
DOCS_PATH = "/home/dtran/clickhouse-docs/docs"
7+
8+
IGNORE_DIRS = {
9+
"changelog", "changelogs", "i18n", "scripts", "static", "styles",
10+
"contribute", "about-us", "_placeholders"
11+
}
12+
13+
GLOSSARY_IMPORT = "import GlossaryTooltip from '@site/src/components/GlossaryTooltip/GlossaryTooltip.jsx';"
14+
15+
def load_glossary(path):
16+
with open(path, 'r', encoding='utf-8') as f:
17+
return json.load(f)
18+
19+
def mask_ignores(text):
20+
placeholders = {}
21+
patterns = {
22+
'codeblocks': r'```[\s\S]*?```',
23+
'inline_code': r'`[^`\n]+`',
24+
'frontmatter': r'^---[\s\S]+?---',
25+
'imports': r'^import .*?;$',
26+
'headers': r'^(#+ .*)$',
27+
'html_blocks': r'<(div|details|summary)[\s\S]*?<\/\1>',
28+
'blockquotes': r'^\s*>.*$',
29+
'links': r'\[([^\]]+)\]\([^)]+\)',
30+
'images': r'!\[[^\]]*\]\([^)]+\)',
31+
'comments': r'<!--[\s\S]*?-->',
32+
}
33+
34+
for name, pattern in patterns.items():
35+
regex = re.compile(pattern, re.MULTILINE)
36+
matches = list(regex.finditer(text))
37+
for i, match in enumerate(matches):
38+
key = f"__MASKED_{name.upper()}_{i}__"
39+
placeholders[key] = match.group(0)
40+
text = text.replace(match.group(0), key)
41+
42+
return text, placeholders
43+
44+
def unmask_ignores(text, placeholders):
45+
for key, value in placeholders.items():
46+
text = text.replace(key, value)
47+
return text
48+
49+
def inject_tooltips(text, glossary):
50+
def replacement(match):
51+
word = match.group(0)
52+
definition = glossary.get(word)
53+
if definition:
54+
return f'<GlossaryTooltip term="{word}" definition="{definition}">{word}</GlossaryTooltip>'
55+
return word
56+
57+
pattern = r'\b(' + '|'.join(re.escape(k) for k in glossary.keys()) + r')\b'
58+
return re.sub(pattern, replacement, text)
59+
60+
def process_file(path, glossary):
61+
with open(path, 'r', encoding='utf-8') as f:
62+
content = f.read()
63+
64+
masked_text, placeholders = mask_ignores(content)
65+
replaced = inject_tooltips(masked_text, glossary)
66+
final_text = unmask_ignores(replaced, placeholders)
67+
68+
if '<GlossaryTooltip' in final_text and GLOSSARY_IMPORT not in final_text:
69+
final_text = GLOSSARY_IMPORT + "\n" + final_text
70+
71+
if '<GlossaryTooltip' in final_text and final_text != content:
72+
new_path = path.replace(".md", ".mdx")
73+
with open(new_path, 'w', encoding='utf-8') as f:
74+
f.write(final_text)
75+
os.remove(path)
76+
print(f"✔ Renamed and updated: {path} -> {new_path}")
77+
else:
78+
print(f"– Skipped (no change): {path}")
79+
80+
def process_directory(base_path, glossary):
81+
for root, dirs, files in os.walk(base_path):
82+
dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
83+
for file in files:
84+
if file.endswith(".md") and not file.startswith("_"):
85+
path = os.path.join(root, file)
86+
process_file(path, glossary)
87+
88+
if __name__ == "__main__":
89+
glossary = load_glossary(GLOSSARY_JSON_PATH)
90+
process_directory(DOCS_PATH, glossary)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"Atomicity": "Atomicity ensures that a transaction (a series of database operations) is treated as a single, indivisible unit. This means that either all operations within the transaction occur, or none do. An example of an atomic transaction is transferring money from one bank account to another. If either step of the transfer fails, the transaction fails, and the money stays in the first account. Atomicity ensures no money is lost or created.",
3+
"Cluster": "A collection of nodes (servers) that work together to store and process data.",
4+
"CMEK": "Customer-managed encryption keys (CMEK) allow customers to use their key-management service (KMS) key to encrypt the ClickHouse disk data key and protect their data at rest.",
5+
"Dictionary": "A dictionary is a mapping of key-value pairs that is useful for various types of reference lists. It is a powerful feature that allows for the efficient use of dictionaries in queries, which is often more efficient than using a `JOIN` with reference tables.",
6+
"Parts": "A physical file on a disk that stores a portion of the table's data. This is different from a partition, which is a logical division of a table's data that is created using a partition key.",
7+
"Replica": "A copy of the data stored in a ClickHouse database. You can have any number of replicas of the same data for redundancy and reliability. Replicas are used in conjunction with the ReplicatedMergeTree table engine, which enables ClickHouse to keep multiple copies of data in sync across different servers.",
8+
"Shard": "A subset of data. ClickHouse always has at least one shard for your data. If you do not split the data across multiple servers, your data will be stored in one shard. Sharding data across multiple servers can be used to divide the load if you exceed the capacity of a single server."
9+
}

src/css/custom.scss

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,3 +1423,50 @@ input::-ms-input-placeholder { /* Microsoft Edge */
14231423
.DocSearch-Cancel {
14241424
color: var(--docsearch-text-color) !important;
14251425
}
1426+
1427+
.tooltip {
1428+
position: relative;
1429+
cursor: help;
1430+
border-bottom: 1px dotted #aaa;
1431+
outline: none; /* remove default outline, but keep focus styling if needed */
1432+
}
1433+
1434+
.tooltipText {
1435+
visibility: hidden;
1436+
width: max-content;
1437+
max-width: 280px;
1438+
background-color: #333;
1439+
color: #fff;
1440+
text-align: left;
1441+
padding: 8px 12px;
1442+
border-radius: 4px;
1443+
1444+
position: absolute;
1445+
z-index: 100;
1446+
top: 125%; /* place tooltip below the term */
1447+
left: 50%;
1448+
transform: translateX(-50%);
1449+
1450+
opacity: 0;
1451+
transition: opacity 0.2s ease-in-out;
1452+
1453+
white-space: pre-wrap;
1454+
box-shadow: 0 0 8px rgba(0,0,0,0.3);
1455+
pointer-events: none; /* tooltip itself should not capture pointer */
1456+
}
1457+
1458+
.tooltipText.visible {
1459+
visibility: visible;
1460+
opacity: 1;
1461+
}
1462+
1463+
.tooltip-link {
1464+
text-decoration: underline dotted;
1465+
color: inherit; /* keep text color */
1466+
}
1467+
1468+
.tooltip-link:hover,
1469+
.tooltip-link:focus {
1470+
text-decoration: underline solid;
1471+
outline: none;
1472+
}

0 commit comments

Comments
 (0)