Skip to content

Commit 50d3910

Browse files
committed
Content Sync: Testing the action
1 parent 0465365 commit 50d3910

File tree

3 files changed

+66
-50
lines changed

3 files changed

+66
-50
lines changed

.github/workflows/sync-to-astro.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,43 +57,43 @@ jobs:
5757
5858
# Step 5: Run the MD to MDX conversion script.
5959
- name: Run sync script
60-
run: cd sync/source && python scripts/md2mdx.py
60+
run: python sync/source/scripts/md2mdx.py --source sync/source --target sync/target
6161

6262
# Step 6: Create or update PR with changes.
6363
# - name: Create Pull Request
6464
# env:
6565
# GH_TOKEN: ${{ secrets.APIFY_SERVICE_ACCOUNT_GITHUB_TOKEN }}
6666
# run: |
6767
# cd sync/target
68-
68+
6969
# # Debug: Show current directory and its contents
7070
# echo "Current directory: $(pwd)"
7171
# echo "Directory contents:"
7272
# ls -la
73-
73+
7474
# # Debug: Show git status in detail
7575
# echo "Git status:"
7676
# git status
77-
77+
7878
# # Debug: Show any differences
7979
# echo "Git diff:"
8080
# git diff
81-
81+
8282
# git config user.name "github-actions[bot]"
8383
# git config user.email "github-actions[bot]@users.noreply.github.com"
8484

8585
# # Create new branch (with error handling).
8686
# git checkout -b ${{ env.TARGET_BRANCH }} || git checkout ${{ env.TARGET_BRANCH }}
87-
87+
8888
# # Debug: Show branch status
8989
# echo "Current branch:"
9090
# git branch --show-current
91-
91+
9292
# # Only create PR if there are changes.
9393
# if [[ -n "$(git status --porcelain)" ]]; then
9494
# echo "Changes detected:"
9595
# git status --porcelain
96-
96+
9797
# git add .
9898
# git commit -m "sync: Update MDX content from whitepaper"
9999

scripts/md2mdx.py

Lines changed: 57 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,33 @@
3535
import frontmatter
3636
import re
3737
import glob
38+
import argparse
39+
40+
41+
def parse_args():
42+
"""Parse command line arguments."""
43+
44+
parser = argparse.ArgumentParser(description='Transform Markdown files to MDX format.')
45+
parser.add_argument('--source', type=str, help='Source directory containing markdown files')
46+
parser.add_argument('--target', type=str, help='Target directory for MDX files')
47+
48+
return parser.parse_args()
49+
3850

3951
# Get the project root - need to handle both direct run and test-sync run.
4052
SCRIPT_PATH = Path(__file__).resolve()
41-
CURRENT_DIR = Path.cwd()
42-
43-
PROJECT_ROOT = SCRIPT_PATH.parent.parent
44-
SOURCE_ROOT = PROJECT_ROOT / 'sync/source'
45-
TARGET_ROOT = PROJECT_ROOT / 'sync/target'
53+
args = parse_args()
4654

4755
print(f'Script location: {__file__}')
48-
print(f'Project root: {PROJECT_ROOT}')
56+
57+
if args.source and args.target:
58+
SOURCE_ROOT = Path(args.source).resolve()
59+
TARGET_ROOT = Path(args.target).resolve()
60+
else:
61+
PROJECT_ROOT = SCRIPT_PATH.parent.parent
62+
SOURCE_ROOT = PROJECT_ROOT / 'sync/source'
63+
TARGET_ROOT = PROJECT_ROOT / 'sync/target'
64+
4965
print(f'Source root: {SOURCE_ROOT}')
5066
print(f'Target root: {TARGET_ROOT}')
5167

@@ -94,7 +110,7 @@
94110

95111
def should_process_file(path: Path) -> bool:
96112
"""Determine if a file should be processed based on ignore rules."""
97-
113+
98114
# Case-insensitive filename check.
99115
if path.name.lower() in IGNORED_FILES:
100116
print(f'\n󰋼 Skipping ignored file: {path.name}')
@@ -138,7 +154,7 @@ def replace_image(match):
138154

139155
def add_github_header(content: str, is_readme: bool) -> str:
140156
"""Add GitHub header component after the first heading, but only for README.md."""
141-
157+
142158
if not is_readme:
143159
return content
144160

@@ -309,61 +325,61 @@ def replace_comment(match):
309325

310326
def transform_internal_links(content: str) -> str:
311327
"""Transform internal markdown links to the new MDX format."""
312-
328+
313329
print('\n󰋼 Transforming internal links...')
314330

315331
def format_link_text(text):
316332
"""Convert technical names to readable titles."""
317-
333+
318334
# Remove file extensions.
319335
text = re.sub(r'\.(json|md)$', '', text)
320-
336+
321337
# Handle special cases.
322338
if text == 'README':
323339
return 'Documentation'
324-
340+
325341
# Convert UPPER_CASE to Title Case.
326342
if text.isupper():
327343
words = text.split('_')
328344
return ' '.join(word.capitalize() for word in words)
329-
345+
330346
return text
331347

332348
def replace_link(match):
333349
text, path, anchor = match.groups()
334-
350+
335351
# Handle different link types.
336352
if path:
337353
# Remove .md extension if present.
338354
path = path.replace('.md', '')
339-
355+
340356
if path == '../README':
341357
# Links to README become root links.
342358
new_path = '/'
343359
else:
344360
# Remove ./ or / prefix if present.
345361
path = path.lstrip('./').lstrip('/')
346-
362+
347363
# Convert to kebab case.
348364
new_path = '/' + path.lower().replace('_', '-')
349365
else:
350366
new_path = ''
351-
367+
352368
# Add anchor if present.
353369
if anchor:
354370
new_path = f"{new_path}{anchor}"
355-
371+
356372
# Format the link text if it's a technical name.
357373
if text.endswith('.md') or text.endswith('.json') or text.isupper() or '.json' in text:
358374
text = format_link_text(text)
359-
375+
360376
print(f' ⭮ {text}{new_path}')
361377
return f'[{text}]({new_path})'
362378

363379
# First pass: handle standard markdown links.
364380
pattern = r'\[([^\]]+)\]\(((?!http)[^)#\s]+)?([#][^)\s]+)?\)'
365381
content = re.sub(pattern, replace_link, content)
366-
382+
367383
# Second pass: handle already transformed links but with technical names.
368384
pattern = r'\[([A-Z_]+(?:\.(?:json|md))?)\](/[a-z-]+(?:[#][^)\s]+)?)\)'
369385
content = re.sub(pattern, lambda m: f'[{format_link_text(m.group(1))}]{m.group(2)}', content)
@@ -373,7 +389,7 @@ def replace_link(match):
373389

374390
def remove_img_tags(content: str) -> str:
375391
"""Remove HTML img tags from the content."""
376-
392+
377393
print('\n󰋼 Removing img tags...')
378394

379395
def replace_img(match):
@@ -390,20 +406,20 @@ def replace_img(match):
390406

391407
def transform_inline_references(content: str) -> str:
392408
"""Transform inline file references and URLs to proper format."""
393-
409+
394410
print('\n󰋼 Transforming inline references...')
395411

396412
def replace_reference(match):
397413
path = match.group(1)
398-
414+
399415
# Remove .md extension if present.
400416
path = path.replace('.md', '')
401-
417+
402418
# Convert to kebab case and add leading slash.
403419
new_path = '/' + path.lstrip('./').lstrip('/').lower().replace('_', '-')
404-
420+
405421
print(f' ⭮ {path}{new_path}')
406-
422+
407423
return new_path
408424

409425
# Transform file references like ./DATASET_SCHEMA.md to /dataset-schema.
@@ -426,10 +442,10 @@ def replace_reference(match):
426442

427443
def transform_markdown_to_mdx(content: str, source_file: Path) -> str:
428444
"""Main transformation pipeline to convert markdown to MDX format."""
429-
445+
430446
print('\n󰋼 Parsing frontmatter...')
431447
post = frontmatter.loads(content)
432-
448+
433449
is_readme = source_file.name.lower() == 'readme.md'
434450

435451
# Apply transformations in sequence.
@@ -451,14 +467,14 @@ def transform_markdown_to_mdx(content: str, source_file: Path) -> str:
451467

452468
def get_target_path(source_path: Path) -> Path:
453469
"""Convert source path to target path using the required transformations."""
454-
470+
455471
# Get relative path from source root.
456472
rel_path = source_path.relative_to(SOURCE_ROOT)
457-
473+
458474
# Transform filename.
459475
stem = rel_path.stem.lower().replace('_', '-')
460476
new_name = f"{stem}.mdx"
461-
477+
462478
# Construct target path.
463479
if source_path.name == 'README.md':
464480
# Special case for README.md -> index.mdx.
@@ -470,47 +486,47 @@ def get_target_path(source_path: Path) -> Path:
470486

471487
def process_files():
472488
"""Main function to process all markdown files."""
473-
489+
474490
try:
475491
# Find all markdown files to process.
476492
source_files = [
477493
Path(p) for p in [
478494
*glob.glob(str(SOURCE_ROOT / '*.md')), # root md files
479495
*glob.glob(str(SOURCE_ROOT / 'pages/*.md')) # files in pages directory
480496
]
481-
497+
482498
if should_process_file(Path(p)) # filter out ignored files
483499
]
484-
500+
485501
print(f'\n󰋼 Found {len(source_files)} markdown files to process')
486-
502+
487503
for source_file in source_files:
488504
target_file = get_target_path(source_file)
489505
print(f'\n󰋼 Processing: {source_file.name}{target_file.name}')
490-
506+
491507
# Read source content.
492508
print(f' Reading source file: {source_file}')
493509
with open(source_file, 'r', encoding='utf-8') as f:
494510
content = f.read()
495511
print(f' Source file size: {len(content)} bytes')
496-
512+
497513
# Transform content.
498514
print('\n󰋼 Transforming content...')
499515
transformed_content = transform_markdown_to_mdx(content, source_file)
500516
print(f' ⭮ {len(transformed_content)} bytes')
501-
517+
502518
# Write target file.
503519
print(f'\n󰋼 Writing target file: {target_file}')
504520
os.makedirs(target_file.parent, exist_ok=True)
505521
with open(target_file, 'w', encoding='utf-8') as f:
506522
f.write(transformed_content)
507523
print(f' ⭮ {source_file.name}{target_file.name}')
508-
524+
509525
print('\n󰋼 Formatting MDX files...')
510526
os.system('npm run format-sync')
511-
527+
512528
print('\n Done')
513-
529+
514530
except Exception as error:
515531
print('\n❌ Error processing files:', str(error))
516532
sys.exit(1)

scripts/test-sync.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ echo -e "\n Python dependencies installed"
5151

5252
echo -e "\n\n"
5353
echo "Current path: $(pwd)"
54-
python3 scripts/md2mdx.py
54+
python3 scripts/md2mdx.py --source $WORK_DIR/source --target $WORK_DIR/target
5555
echo -e "\n MD to MDX conversion completed"
5656

5757
echo -e "\n\n"

0 commit comments

Comments
 (0)