Skip to content

Commit f28eae1

Browse files
committed
final fix for blog & docs url
1 parent 87a9794 commit f28eae1

File tree

1 file changed

+33
-27
lines changed

1 file changed

+33
-27
lines changed

scripts/typesense_indexer.py

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -301,64 +301,70 @@ def normalize_slug(s: str) -> str:
301301

302302
try:
303303
with open(file_path, 'r', encoding='utf-8') as f:
304-
content = f.read()
304+
post = frontmatter.load(f)
305305

306-
# Parse frontmatter
307-
post = frontmatter.loads(content)
308306
metadata = post.metadata
307+
rel_path = file_path.relative_to(content_root)
309308

310309
if is_blog:
311-
# Blog processing
312-
url = '/blog/' + normalize_slug(file_path.stem)
310+
# Blog logic
311+
slug = normalize_slug(file_path.stem)
312+
url_path = '/blog/' + slug
313313
section = 'Blog'
314314
subsection = metadata.get('author', None)
315-
rel_path = file_path.relative_to(content_root)
316-
is_component = False
317315

318-
# Extract title for blog
319316
title = metadata.get('title', '')
320317
if not title:
321318
headings = self.processor.extract_headings(post.content)
322-
title = headings[0] if headings else normalize_slug(file_path.stem).replace('-', ' ').title()
319+
title = headings[0] if headings else slug.replace('-', ' ').title()
323320
else:
324-
# Docs processing (using new logic)
325-
url = self.get_url_from_path(file_path, content_root)
326-
section, subsection = self.get_section_info(file_path, content_root)
327-
rel_path = file_path.relative_to(content_root)
328-
is_component = 'library' in rel_path.parts
329-
330-
# Extract title for docs (with -ll handling)
331-
stem = file_path.stem
332-
if stem.endswith('-ll'):
333-
base = stem[:-3].replace('-', ' ').title()
321+
# Docs logic (matching your cleaner version)
322+
path_parts = [normalize_slug(p) for p in rel_path.parts[:-1]] # Remove filename and normalize
323+
url_path = '/' + '/'.join(['docs'] + path_parts)
324+
325+
stem = normalize_slug(file_path.stem)
326+
if file_path.name != 'index.md':
327+
if stem.endswith('-ll'):
328+
stem = stem[:-3] # Remove -ll
329+
url_path += '/' + stem + '/low'
330+
else:
331+
url_path += '/' + stem
332+
333+
if url_path != '/' and url_path.endswith('/'):
334+
url_path = url_path.rstrip('/')
335+
336+
section = path_parts[0] if path_parts else 'docs'
337+
subsection = path_parts[1] if len(path_parts) > 1 else None
338+
339+
# Title logic
340+
if file_path.stem.endswith('-ll'):
341+
base = stem.replace('-', ' ').title()
334342
default_title = f"{base} Low Level"
335343
else:
336344
default_title = stem.replace('-', ' ').title()
337-
338345
title = metadata.get('title', default_title)
339346

340-
# Process content (common for both)
347+
# Common processing
341348
clean_content = self.processor.clean_content(post.content)
342349
headings = self.processor.extract_headings(post.content)
343350
components = list(self.processor.extract_components(post.content))
344351
code_examples = self.processor.extract_code_examples(post.content)
345352

346-
# Create document
353+
print(url_path)
354+
347355
doc = {
348356
'id': str(rel_path),
349357
'title': title,
350358
'content': clean_content,
351359
'headings': headings,
352360
'path': str(rel_path),
353-
'url': url,
361+
'url': url_path,
354362
'section': section,
355-
'is_component': is_component,
363+
'is_component': 'library' in rel_path.parts,
356364
}
357365

358-
# Add breadcrumbs
359366
doc['breadcrumb'] = self.create_breadcrumb(doc)
360367

361-
# Add optional fields
362368
if code_examples:
363369
doc['code_examples'] = code_examples
364370
if components:
@@ -384,7 +390,7 @@ def index_documents(self, docs_root: Path, batch_size: int = 100, is_blog: bool
384390
processed = 0
385391

386392
for file_path in markdown_files:
387-
doc = self.process_file(file_path, docs_root)
393+
doc = self.process_file(file_path, docs_root, is_blog)
388394
if doc:
389395
documents.append(doc)
390396
processed += 1

0 commit comments

Comments
 (0)