@@ -57,25 +57,49 @@ def normalize(modality: str) -> str:
5757
5858def extract_modalities_from_markdown (md_text : str ) -> List [str ]:
5959 modalities : List [str ] = []
60+ in_counts_section = False
61+
6062 for line in md_text .splitlines ():
61- if not line .startswith ("|" ):
63+ # Skip the modality count table itself
64+ if "<!-- START: modality-counts -->" in line :
65+ in_counts_section = True
66+ continue
67+ if "<!-- END: modality-counts -->" in line :
68+ in_counts_section = False
69+ continue
70+ if in_counts_section :
6271 continue
63- if re .match (r"^\|\s*Project\s*\|" , line ):
72+
73+ # Process only table rows from client project lists
74+ if not line .strip ().startswith ("|" ):
75+ continue
76+ if re .match (r"^\|\s*Project\s*\|" , line , re .IGNORECASE ):
6477 continue
6578 if re .match (r"^\|\s*-+\s*\|" , line ):
6679 continue
67- cells = [c .strip () for c in line .split ("|" )]
68- # Expect at least 4 cells: leading empty, Project, Modality, Repo, Date, trailing empty
80+
81+ # Split safely
82+ cells = [c .strip () for c in line .strip ().split ("|" ) if c .strip ()]
6983 if len (cells ) < 4 :
7084 continue
71- modality_cell = cells [2 ] # Modality is the second visible column
85+
86+ # Identify the modality column by name if possible
87+ header_lower = [c .lower () for c in cells ]
88+ if "modality" in header_lower :
89+ continue # skip header rows
90+
91+ modality_cell = cells [1 ] if len (cells ) == 3 else cells [2 ]
92+
7293 if not modality_cell :
7394 continue
95+
96+ # Split on commas if multiple modalities listed
7497 parts = re .split (r"\s*,\s*" , modality_cell )
7598 for p in parts :
7699 n = normalize (p )
77100 if n :
78101 modalities .append (n )
102+
79103 return modalities
80104
81105def build_table (counts : Counter ) -> str :
0 commit comments