@@ -63,15 +63,48 @@ def _parse_sections(self) -> Dict[str, str]:
6363 if current_section :
6464 sections [current_section ] = '\n ' .join (current_content )
6565
66+ # Also parse individual container images as separate sections
67+ # Find the Container Images section and extract individual image docs
68+ container_images_match = re .search (r'^## Container Images\n' , self .content , re .MULTILINE )
69+ if container_images_match :
70+ start_pos = container_images_match .end ()
71+ section_content = self .content [start_pos :]
72+
73+ # Use regex to find all image sections (### header followed by content until next separator or end)
74+ # Pattern: ### image-name followed by content until <!-- IMAGE_SEPARATOR --> or end of string
75+ image_pattern = r'### ([a-z0-9\-]+)\n(.*?)(?=\n<!-- IMAGE_SEPARATOR -->|\Z)'
76+ for match in re .finditer (image_pattern , section_content , re .DOTALL ):
77+ image_name = match .group (1 )
78+ image_content = f'### { image_name } \n { match .group (2 ).strip ()} '
79+ sections [f'image:{ image_name } ' ] = image_content
80+
6681 return sections
6782
6883 def _extract_images (self ) -> List [str ]:
6984 """Extract list of container image names from docs."""
7085 images = set ()
71- # Look for image references like cgr.dev/chainguard/python
72- pattern = r'cgr\.dev/chainguard/([a-z0-9\-]+)'
73- for match in re .finditer (pattern , self .content ):
74- images .add (match .group (1 ))
86+
87+ # Find the "## Container Images" section and extract all ### headers with simple names
88+ container_images_match = re .search (r'^## Container Images\n' , self .content , re .MULTILINE )
89+
90+ if container_images_match :
91+ # Start from the Container Images section
92+ start_pos = container_images_match .end ()
93+ # Get content from Container Images section to the end
94+ section_content = self .content [start_pos :]
95+
96+ # Extract all ### headers that match image name pattern
97+ # Image names are simple: lowercase letters, numbers, and hyphens
98+ image_pattern = r'\n### ([a-z0-9\-]+)\n'
99+ for match in re .finditer (image_pattern , section_content ):
100+ images .add (match .group (1 ))
101+
102+ # Fallback: also look for cgr.dev references if no images found from headers
103+ if not images :
104+ pattern = r'cgr\.dev/chainguard/([a-z0-9\-]+)'
105+ for match in re .finditer (pattern , self .content ):
106+ images .add (match .group (1 ))
107+
75108 return sorted (list (images ))
76109
77110 def search (self , query : str , max_results : int = 5 ) -> List [Dict [str , str ]]:
@@ -107,14 +140,17 @@ def search(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
107140
108141 def get_image_docs (self , image_name : str ) -> Optional [str ]:
109142 """Get documentation for a specific container image."""
110- # Search for sections mentioning this image
143+ # First, try to find the image-specific section (with image: prefix)
144+ image_key = f'image:{ image_name } '
145+ if image_key in self .sections :
146+ return self .sections [image_key ]
147+
148+ # Fallback: try without exact match (case-insensitive)
111149 for section_name , section_content in self .sections .items ():
112- if image_name .lower () in section_content .lower ():
113- # Look for image-specific section
114- if image_name in section_name .lower ():
115- return section_content
150+ if section_name .startswith ('image:' ) and image_name .lower () in section_name .lower ():
151+ return section_content
116152
117- # Fallback : search for any mention
153+ # Last resort : search for any mention
118154 results = self .search (image_name , max_results = 1 )
119155 if results :
120156 return results [0 ]['full_content' ]
0 commit comments