Skip to content

Commit ea3041d

Browse files
committed
fix refs
1 parent 414eeda commit ea3041d

File tree

3 files changed

+188
-1
lines changed

3 files changed

+188
-1
lines changed

scripts/compare_and_fix_refs.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import json
4+
import re
5+
from pathlib import Path
6+
7+
SRC_DIR = Path("./src")
8+
REFS_JSON = Path("/tmp/refs.json")
9+
10+
# Matches content between {{#ref}} and {{#endref}}, including newlines, lazily
11+
REF_RE = re.compile(r"{{#ref}}\s*([\s\S]*?)\s*{{#endref}}", re.MULTILINE)
12+
13+
def extract_refs(text: str):
14+
"""Return a list of refs (trimmed) in appearance order."""
15+
return [m.strip() for m in REF_RE.findall(text)]
16+
17+
def replace_refs_in_text(text: str, new_refs: list):
18+
"""Replace all refs in text with new_refs, maintaining order."""
19+
matches = list(REF_RE.finditer(text))
20+
if len(matches) != len(new_refs):
21+
return text # Can't replace if counts don't match
22+
23+
# Replace from end to beginning to avoid offset issues
24+
result = text
25+
for match, new_ref in zip(reversed(matches), reversed(new_refs)):
26+
# Get the full match span to replace the entire {{#ref}}...{{#endref}} block
27+
start, end = match.span()
28+
# Format the replacement with proper newlines
29+
formatted_replacement = f"{{{{#ref}}}}\n{new_ref}\n{{{{#endref}}}}"
30+
result = result[:start] + formatted_replacement + result[end:]
31+
32+
return result
33+
34+
def main():
35+
parser = argparse.ArgumentParser(description="Compare and fix refs between current branch and master branch")
36+
parser.add_argument("--files-unmatched-paths", type=str,
37+
help="Path to file where unmatched file paths will be saved (comma-separated on first line)")
38+
args = parser.parse_args()
39+
40+
if not SRC_DIR.is_dir():
41+
raise SystemExit(f"Not a directory: {SRC_DIR}")
42+
43+
if not REFS_JSON.exists():
44+
raise SystemExit(f"Reference file not found: {REFS_JSON}")
45+
46+
# Load the reference refs from master branch
47+
try:
48+
with open(REFS_JSON, 'r', encoding='utf-8') as f:
49+
master_refs = json.load(f)
50+
except (json.JSONDecodeError, UnicodeDecodeError) as e:
51+
raise SystemExit(f"Error reading {REFS_JSON}: {e}")
52+
53+
print(f"Loaded reference data for {len(master_refs)} files from {REFS_JSON}")
54+
55+
files_processed = 0
56+
files_modified = 0
57+
files_with_differences = 0
58+
unmatched_files = [] # Track files with unmatched refs
59+
60+
for md_path in sorted(SRC_DIR.rglob("*.md")):
61+
rel = md_path.relative_to(SRC_DIR).as_posix()
62+
rel_with_src = f"{SRC_DIR.name}/{rel}" # Include src/ prefix for output
63+
files_processed += 1
64+
65+
try:
66+
content = md_path.read_text(encoding="utf-8")
67+
except UnicodeDecodeError:
68+
# Fallback if encoding is odd
69+
content = md_path.read_text(errors="replace")
70+
71+
current_refs = extract_refs(content)
72+
73+
# Check if file exists in master refs
74+
if rel not in master_refs:
75+
if current_refs:
76+
print(f"⚠️ NEW FILE with refs: {rel_with_src} (has {len(current_refs)} refs)")
77+
files_with_differences += 1
78+
unmatched_files.append(rel_with_src)
79+
continue
80+
81+
master_file_refs = master_refs[rel]
82+
83+
# Compare ref counts
84+
if len(current_refs) != len(master_file_refs):
85+
print(f"📊 REF COUNT MISMATCH: {rel_with_src} -- Master: {len(master_file_refs)} refs, Current: {len(current_refs)} refs")
86+
files_with_differences += 1
87+
unmatched_files.append(rel_with_src)
88+
continue
89+
90+
# If no refs in either, skip
91+
if not current_refs and not master_file_refs:
92+
continue
93+
94+
# Compare individual refs
95+
differences_found = False
96+
for i, (current_ref, master_ref) in enumerate(zip(current_refs, master_file_refs)):
97+
if current_ref != master_ref:
98+
if not differences_found:
99+
print(f"🔍 REF DIFFERENCES in {rel_with_src}:")
100+
differences_found = True
101+
print(f" Ref {i+1}:")
102+
print(f" Master: {repr(master_ref)}")
103+
print(f" Current: {repr(current_ref)}")
104+
105+
if differences_found:
106+
files_with_differences += 1
107+
unmatched_files.append(rel_with_src)
108+
109+
# Replace current refs with master refs
110+
try:
111+
new_content = replace_refs_in_text(content, master_file_refs)
112+
if new_content != content:
113+
md_path.write_text(new_content, encoding="utf-8")
114+
files_modified += 1
115+
print(f" ✅ Fixed refs in {rel_with_src}")
116+
else:
117+
print(f" ❌ Failed to replace refs in {rel_with_src}")
118+
except Exception as e:
119+
print(f" ❌ Error fixing refs in {rel_with_src}: {e}")
120+
121+
# Save unmatched files to specified path if requested
122+
if args.files_unmatched_paths and unmatched_files:
123+
try:
124+
unmatched_paths_file = Path(args.files_unmatched_paths)
125+
unmatched_paths_file.parent.mkdir(parents=True, exist_ok=True)
126+
with open(unmatched_paths_file, 'w', encoding='utf-8') as f:
127+
f.write(','.join(unmatched_files))
128+
print(f"📝 Saved {len(unmatched_files)} unmatched file paths to: {unmatched_paths_file}")
129+
except Exception as e:
130+
print(f"❌ Error saving unmatched paths to {args.files_unmatched_paths}: {e}")
131+
elif args.files_unmatched_paths and not unmatched_files:
132+
# Create empty file if no unmatched files found
133+
try:
134+
unmatched_paths_file = Path(args.files_unmatched_paths)
135+
unmatched_paths_file.parent.mkdir(parents=True, exist_ok=True)
136+
unmatched_paths_file.write_text('\n', encoding='utf-8')
137+
print(f"� No unmatched files found. Created empty file: {unmatched_paths_file}")
138+
except Exception as e:
139+
print(f"❌ Error creating empty unmatched paths file {args.files_unmatched_paths}: {e}")
140+
141+
print(f"\n�📈 SUMMARY:")
142+
print(f" Files processed: {files_processed}")
143+
print(f" Files with differences: {files_with_differences}")
144+
print(f" Files modified: {files_modified}")
145+
if unmatched_files:
146+
print(f" Unmatched files: {len(unmatched_files)}")
147+
148+
if __name__ == "__main__":
149+
main()

scripts/get_and_save_refs.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env python3
2+
import json
3+
import re
4+
from pathlib import Path
5+
6+
SRC_DIR = Path("./src")
7+
REFS_JSON = Path("/tmp/refs.json")
8+
9+
# Matches content between {{#ref}} and {{#endref}}, including newlines, lazily
10+
REF_RE = re.compile(r"{{#ref}}\s*([\s\S]*?)\s*{{#endref}}", re.MULTILINE)
11+
12+
def extract_refs(text: str):
13+
"""Return a list of refs (trimmed) in appearance order."""
14+
return [m.strip() for m in REF_RE.findall(text)]
15+
16+
def main():
17+
if not SRC_DIR.is_dir():
18+
raise SystemExit(f"Not a directory: {SRC_DIR}")
19+
20+
refs_per_path = {} # { "relative/path.md": [ref1, ref2, ...] }
21+
22+
for md_path in sorted(SRC_DIR.rglob("*.md")):
23+
rel = md_path.relative_to(SRC_DIR).as_posix()
24+
try:
25+
content = md_path.read_text(encoding="utf-8")
26+
except UnicodeDecodeError:
27+
# Fallback if encoding is odd
28+
content = md_path.read_text(errors="replace")
29+
30+
refs = extract_refs(content)
31+
refs_per_path[rel] = refs # keep order from findall
32+
33+
34+
REFS_JSON.write_text(json.dumps(refs_per_path, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
35+
print(f"Wrote {REFS_JSON} with {len(refs_per_path)} files.")
36+
37+
if __name__ == "__main__":
38+
main()

scripts/translator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ def translate_directory(language, source_path, dest_path, model, num_threads, cl
425425
translate_files = None # Need to initialize it here to avoid error
426426
if args.file_paths:
427427
# Translate only the indicated file
428-
translate_files = [f for f in args.file_paths.split(' , ') if f]
428+
translate_files = [f.strip() for f in args.file_paths.split(',') if f]
429429
for file_path in translate_files:
430430
#with tqdm(total=len(all_markdown_files), desc="Translating Files") as pbar:
431431
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:

0 commit comments

Comments
 (0)