22import pathlib
33import datetime
44import re
5+ import sys
56import yaml
67import logging
78from concurrent .futures import ThreadPoolExecutor , as_completed
89from typing import Dict , List , Any , Optional
10+
911import typesense
12+ import reflex as rx
13+ from reflex .utils .imports import ImportVar
14+
15+ # Add the project root to the sys.path
16+ project_root = pathlib .Path (__file__ ).resolve ().parent .parent
17+ if str (project_root ) not in sys .path :
18+ sys .path .insert (0 , str (project_root ))
19+
20+ from pcweb .pages .docs .source import Source , generate_docs
21+ from pcweb .pages .docs .apiref import modules
22+ from pcweb .pages .docs .env_vars import env_vars_page , EnvVarDocs
1023
1124logging .basicConfig (level = logging .INFO , format = '%(asctime)s - %(levelname)s - %(message)s' )
1225logger = logging .getLogger (__name__ )
1326
1427ACRONYMS = {"AI" , "API" , "HTTP" , "HTTPS" , "SQL" , "JSON" , "XML" , "CPU" , "GPU" , "OAuth" , "CLI" , "URL" , "DNS" , "IP" , "UI" , "MCP" }
1528
29+ def _render_component_to_text (c : Any ) -> str :
30+ """Render a Reflex component to a text string."""
31+ if not isinstance (c , rx .Component ):
32+ if isinstance (c , rx .Var ):
33+ return str (c ._var_value )
34+ if isinstance (c , (str , int , float , bool )):
35+ return str (c )
36+ return ""
37+
38+ texts = [_render_component_to_text (child ) for child in c .children ]
39+ return " " .join (filter (None , texts ))
40+
41+ def _extract_headings_from_component (c : Any ) -> List [str ]:
42+ """Extract headings from a component tree."""
43+ headings = []
44+ if not isinstance (c , rx .Component ):
45+ return headings
46+
47+ if c .tag and c .tag .startswith ('h' ) and c .tag [1 :].isdigit ():
48+ headings .append (_render_component_to_text (c ))
49+
50+ for child in c .children :
51+ headings .extend (_extract_headings_from_component (child ))
52+
53+ return headings
54+
1655CLUSTERS = {
1756 "All Content" : [],
1857 "AI Builder" : ["ai_builder" ],
@@ -59,9 +98,14 @@ def __init__(self):
5998 self .client = typesense .Client (TYPESENSE_CONFIG )
6099
61100 def smart_title_case (self , name : str ) -> str :
62- def fix_word (word : str ) -> str :
63- return word .upper () if word .upper () in ACRONYMS else word .capitalize ()
64- return " " .join (fix_word (w ) for w in name .split ())
101+ words = name .split (' ' )
102+ title_cased_words = []
103+ for word in words :
104+ if word .upper () in ACRONYMS :
105+ title_cased_words .append (word .upper ())
106+ else :
107+ title_cased_words .append (word .capitalize ())
108+ return " " .join (title_cased_words )
65109
66110 def clean_name (self , name : str ) -> str :
67111 if name .lower ().endswith (".md" ):
@@ -147,7 +191,7 @@ def process_doc_file(self, docs_path: str, file: str, root: str) -> Optional[dic
147191 for i , p in enumerate (parts ):
148192 is_last = i == len (parts ) - 1
149193 if is_last :
150- if filename_no_ext .endswith ("-ll" ):
194+ if filename_no_ext .lower (). endswith ("-ll" ):
151195 parts_clean .append ("Low Level" )
152196 else :
153197 parts_clean .append (self .clean_name (filename_no_ext ))
@@ -159,7 +203,7 @@ def process_doc_file(self, docs_path: str, file: str, root: str) -> Optional[dic
159203 url_parts [- 1 ] = url_parts [- 1 ].replace ("-ll" , "/low" )
160204
161205 url = "/" + "/" .join (url_parts )
162- name = self . name_from_url ( f"docs { url } " )
206+ name = " " . join ( parts_clean )
163207
164208 full_content = self .summarize_markdown (file_path , max_lines = 100 )
165209 components = self .extract_components (file_path )
@@ -187,6 +231,126 @@ def process_doc_file(self, docs_path: str, file: str, root: str) -> Optional[dic
187231 "parts" : parts_clean ,
188232 }
189233
234+ def _index_programmatic_docs (self ) -> List [dict ]:
235+ logger .info ("Processing programmatic docs..." )
236+ documents = []
237+
238+ # Process API reference pages
239+ for module in modules :
240+ if isinstance (module , tuple ):
241+ module , * extra_modules = module
242+ extra_fields = []
243+ for extra_module in extra_modules :
244+ s_extra = Source (module = extra_module )
245+ extra_fields .extend (s_extra .get_fields ())
246+ else :
247+ extra_fields = None
248+ s = Source (module = module )
249+ name = module .__name__ .lower ()
250+
251+ # Get the content from the source object directly
252+ content_parts = []
253+ headings = []
254+
255+ overview = s .get_overview ()
256+ if overview :
257+ content_parts .append (overview )
258+
259+ class_fields = s .get_class_fields ()
260+ if class_fields :
261+ content_parts .append ("\n ## Class Fields\n " )
262+ headings .append ("Class Fields" )
263+ for field in class_fields :
264+ prop = field .get ("prop" )
265+ if not prop : continue
266+ prop_name = getattr (prop , 'name' , '' )
267+ description = field .get ("description" , "" )
268+ content_parts .append (f"### { prop_name } \n { description } \n " )
269+ headings .append (prop_name )
270+
271+ fields = s .get_fields ()
272+ if extra_fields :
273+ fields .extend (extra_fields )
274+ if fields :
275+ content_parts .append ("\n ## Fields\n " )
276+ headings .append ("Fields" )
277+ for field in fields :
278+ prop = field .get ("prop" )
279+ if not prop : continue
280+ prop_name = getattr (prop , 'name' , '' )
281+ description = field .get ("description" , "" )
282+ content_parts .append (f"### { prop_name } \n { description } \n " )
283+ headings .append (prop_name )
284+
285+ methods = s .get_methods ()
286+ if methods :
287+ content_parts .append ("\n ## Methods\n " )
288+ headings .append ("Methods" )
289+ for method in methods :
290+ method_name = method .get ("name" , "" )
291+ signature = method .get ("signature" , "" )
292+ description = method .get ("description" , "" )
293+ content_parts .append (f"### { method_name } { signature } \n { description } \n " )
294+ headings .append (f"{ method_name } { signature } " )
295+
296+ content = "\n " .join (content_parts )
297+
298+ url_path = f"/api-reference/{ name } "
299+ title = self .name_from_url (f"docs{ url_path } " )
300+ path = f"api-reference/{ name } "
301+
302+ documents .append ({
303+ "id" : path ,
304+ "title" : title ,
305+ "content" : self .clean_markdown (content ),
306+ "headings" : headings ,
307+ "path" : path ,
308+ "url" : f"docs{ url_path } " ,
309+ "section" : "API Reference" ,
310+ "subsection" : name ,
311+ "cluster" : "API Reference" ,
312+ "is_blog" : False ,
313+ "parts" : ["API Reference" , title ],
314+ "components" : [],
315+ })
316+
317+ # Process Environment Variables page
318+ env_var_url_path = "/api-reference/environment-variables"
319+ env_var_title = self .name_from_url (f"docs{ env_var_url_path } " )
320+ env_var_path = "api-reference/environment-variables"
321+
322+ all_vars = EnvVarDocs .get_all_env_vars ()
323+ content_parts = [
324+ "Reflex provides a number of environment variables that can be used to configure the behavior of your application. These environment variables can be set in your shell environment or in a .env file. This page documents all available environment variables in Reflex."
325+ ]
326+ headings = ["Environment Variables" ]
327+ for name , var in all_vars :
328+ if not getattr (var , "internal" , False ):
329+ docstring = EnvVarDocs .get_env_var_docstring (name ) or ""
330+ var_type = var .type_ .__name__ if hasattr (var .type_ , "__name__" ) else str (var .type_ )
331+ content_parts .append (f"{ var .name } : { docstring } (Type: { var_type } , Default: { var .default } )" )
332+ headings .append (var .name )
333+
334+ content = "\n " .join (content_parts )
335+
336+ documents .append ({
337+ "id" : env_var_path ,
338+ "title" : env_var_title ,
339+ "content" : self .clean_markdown (content ),
340+ "headings" : headings ,
341+ "path" : env_var_path ,
342+ "url" : f"docs{ env_var_url_path } " ,
343+ "section" : "API Reference" ,
344+ "subsection" : "Environment Variables" ,
345+ "cluster" : "API Reference" ,
346+ "is_blog" : False ,
347+ "parts" : ["API Reference" , env_var_title ],
348+ "components" : [],
349+ })
350+
351+ logger .info (f"Found { len (documents )} programmatic docs." )
352+ return documents
353+
190354 def extract_frontmatter (self , md_path : str ) -> dict :
191355 """Your existing frontmatter extraction"""
192356 with open (md_path , "r" , encoding = "utf-8" ) as f :
@@ -263,6 +427,8 @@ def create_collection(self, force_recreate: bool = False) -> bool:
263427 def index_documents (self , docs_path : str , blog_path : str , max_workers : int = 4 , batch_size : int = 100 ) -> bool :
264428 """Index both docs and blog files"""
265429 try :
430+ programmatic_docs = self ._index_programmatic_docs ()
431+
266432 docs_files = []
267433 for root , _ , files in os .walk (docs_path ):
268434 for file in files :
@@ -279,7 +445,7 @@ def index_documents(self, docs_path: str, blog_path: str, max_workers: int = 4,
279445 all_files = docs_files + blog_files
280446 logger .info (f"Found { len (docs_files )} docs and { len (blog_files )} blog files" )
281447
282- documents = []
448+ documents = programmatic_docs
283449 processed = 0
284450
285451 with ThreadPoolExecutor (max_workers = max_workers ) as executor :
@@ -297,11 +463,11 @@ def index_documents(self, docs_path: str, blog_path: str, max_workers: int = 4,
297463 if len (documents ) >= batch_size :
298464 self ._index_batch (documents )
299465 documents = []
300- logger .info (f"Processed { processed } /{ len (all_files )} files" )
466+ logger .info (f"Processed { processed } /{ len (all_files )} files (plus programmatic docs) " )
301467
302468 if documents :
303469 self ._index_batch (documents )
304- logger .info (f"Processed { processed } /{ len (all_files )} files" )
470+ logger .info (f"Processed { processed } /{ len (all_files )} files (plus programmatic docs) " )
305471
306472 logger .info ("Indexing completed successfully!" )
307473 return True
0 commit comments