@@ -332,80 +332,104 @@ def check_assay_outcome_pairing(sheets: list[SheetData]) -> list[QCIssue]:
332332 issues : list [QCIssue ] = []
333333
334334 for sheet in sheets :
335- # Only process sheets that declare an "assay outcome" column.
336- # This avoids mis-parsing files with different schemas (e.g., metpo_sheet.tsv).
337- header_cells : list [str ] = []
338- for header_row in sheet .rows [:2 ]:
339- for cell in header_row :
340- if cell :
341- header_cells .append (str (cell ).strip ().lower ())
342- if not any ("assay outcome" in cell for cell in header_cells ):
335+ if not _sheet_has_assay_outcome_column (sheet ):
343336 continue
344337
345- # synonym column (index 9) and assay outcome column (index 11 )
346- synonym_map : dict [ str , list [ tuple [ int , str , str , str ]]] = defaultdict ( list )
338+ synonym_map = _extract_synonym_map_for_assay_outcomes ( sheet )
339+ issues . extend ( _build_assay_outcome_issues ( sheet , synonym_map ) )
347340
348- for row_num , row in enumerate (sheet .rows , start = 1 ):
349- if row_num <= 2 :
350- continue
351- if len (row ) < 12 :
352- continue
341+ return issues
353342
354- row_id = row [0 ].strip () if row [0 ] else ""
355- label = row [1 ].strip () if len (row ) > 1 and row [1 ] else ""
356- synonym_tuples = row [9 ].strip () if len (row ) > 9 and row [9 ] else ""
357- assay_outcome = row [11 ].strip () if len (row ) > 11 and row [11 ] else ""
358343
359- if not (row_id and synonym_tuples and assay_outcome ):
360- continue
344+ def _sheet_has_assay_outcome_column (sheet : SheetData ) -> bool :
345+ """Return True when a sheet header includes an assay outcome column."""
346+ header_cells : list [str ] = []
347+ for header_row in sheet .rows [:2 ]:
348+ for cell in header_row :
349+ if cell :
350+ header_cells .append (str (cell ).strip ().lower ())
351+ return any ("assay outcome" in cell for cell in header_cells )
361352
362- # Extract synonym string from tuple like "oboInOwl:hasRelatedSynonym 'fermentation'"
363- match = re .search (r"'([^']+)'" , synonym_tuples )
364- if match :
365- synonym = match .group (1 )
366- synonym_map [synonym ].append ((row_num , row_id , label , assay_outcome ))
367353
368- for synonym , entries in synonym_map .items ():
369- outcomes = [e [3 ] for e in entries ]
354+ def _extract_synonym_map_for_assay_outcomes (
355+ sheet : SheetData ,
356+ ) -> dict [str , list [tuple [int , str , str , str ]]]:
357+ """Extract synonym -> [(row_num, id, label, assay_outcome)] from a sheet."""
358+ synonym_map : dict [str , list [tuple [int , str , str , str ]]] = defaultdict (list )
370359
371- if len (entries ) == 2 :
372- if outcomes [0 ] == outcomes [1 ]:
373- id1 , label1 = entries [0 ][1 ], entries [0 ][2 ]
374- id2 , label2 = entries [1 ][1 ], entries [1 ][2 ]
375- issues .append (
376- QCIssue (
377- "ERROR" ,
378- "ASSAY_OUTCOME_MISMATCH" ,
379- f"Synonym '{ synonym } ' has two properties with same outcome "
380- f"'{ outcomes [0 ]} ': { id1 } ({ label1 } ) and { id2 } ({ label2 } ). "
381- f"One should be '+' and the other '-'." ,
382- f"{ sheet .filename } : rows { entries [0 ][0 ]} , { entries [1 ][0 ]} " ,
383- )
384- )
385- elif len (entries ) == 1 :
386- row_num , row_id , label , outcome = entries [0 ]
387- # Single entry with outcome is OK for parent properties (e.g. enzyme activity analyzed)
388- # but warn for +/- properties that lack a counterpart
389- if outcome in ("+" , "-" ):
390- issues .append (
391- QCIssue (
392- "WARNING" ,
393- "UNPAIRED_ASSAY_OUTCOME" ,
394- f"Synonym '{ synonym } ' has only one property with outcome "
395- f"'{ outcome } ': { row_id } ({ label } ). Expected a +/- pair." ,
396- f"{ sheet .filename } : row { row_num } " ,
397- )
398- )
399- elif len (entries ) > 2 :
400- ids = ", " .join (f"{ e [1 ]} ({ e [2 ]} , { e [3 ]} )" for e in entries )
360+ for row_num , row in enumerate (sheet .rows , start = 1 ):
361+ if row_num <= 2 or len (row ) < 12 :
362+ continue
363+
364+ row_id = row [0 ].strip () if row [0 ] else ""
365+ label = row [1 ].strip () if len (row ) > 1 and row [1 ] else ""
366+ synonym_tuples = row [9 ].strip () if len (row ) > 9 and row [9 ] else ""
367+ assay_outcome = row [11 ].strip () if len (row ) > 11 and row [11 ] else ""
368+
369+ if not (row_id and synonym_tuples and assay_outcome ):
370+ continue
371+
372+ # Extract synonym string from tuple like "oboInOwl:hasRelatedSynonym 'fermentation'"
373+ match = re .search (r"'([^']+)'" , synonym_tuples )
374+ if match :
375+ synonym = match .group (1 )
376+ synonym_map [synonym ].append ((row_num , row_id , label , assay_outcome ))
377+
378+ return synonym_map
379+
380+
381+ def _build_assay_outcome_issues (
382+ sheet : SheetData ,
383+ synonym_map : dict [str , list [tuple [int , str , str , str ]]],
384+ ) -> list [QCIssue ]:
385+ """Build assay outcome QC issues for a parsed synonym map."""
386+ issues : list [QCIssue ] = []
387+
388+ for synonym , entries in synonym_map .items ():
389+ entry_count = len (entries )
390+ outcomes = [e [3 ] for e in entries ]
391+
392+ if entry_count == 2 and outcomes [0 ] == outcomes [1 ]:
393+ id1 , label1 = entries [0 ][1 ], entries [0 ][2 ]
394+ id2 , label2 = entries [1 ][1 ], entries [1 ][2 ]
395+ issues .append (
396+ QCIssue (
397+ "ERROR" ,
398+ "ASSAY_OUTCOME_MISMATCH" ,
399+ f"Synonym '{ synonym } ' has two properties with same outcome "
400+ f"'{ outcomes [0 ]} ': { id1 } ({ label1 } ) and { id2 } ({ label2 } ). "
401+ f"One should be '+' and the other '-'." ,
402+ f"{ sheet .filename } : rows { entries [0 ][0 ]} , { entries [1 ][0 ]} " ,
403+ )
404+ )
405+ continue
406+
407+ if entry_count == 1 :
408+ row_num , row_id , label , outcome = entries [0 ]
409+ # Single entry with outcome is OK for parent properties (e.g. enzyme activity analyzed)
410+ # but warn for +/- properties that lack a counterpart
411+ if outcome in ("+" , "-" ):
401412 issues .append (
402413 QCIssue (
403414 "WARNING" ,
404- "MULTIPLE_ASSAY_OUTCOMES" ,
405- f"Synonym '{ synonym } ' has { len (entries )} properties: { ids } " ,
406- sheet .filename ,
415+ "UNPAIRED_ASSAY_OUTCOME" ,
416+ f"Synonym '{ synonym } ' has only one property with outcome "
417+ f"'{ outcome } ': { row_id } ({ label } ). Expected a +/- pair." ,
418+ f"{ sheet .filename } : row { row_num } " ,
407419 )
408420 )
421+ continue
422+
423+ if entry_count > 2 :
424+ ids = ", " .join (f"{ e [1 ]} ({ e [2 ]} , { e [3 ]} )" for e in entries )
425+ issues .append (
426+ QCIssue (
427+ "WARNING" ,
428+ "MULTIPLE_ASSAY_OUTCOMES" ,
429+ f"Synonym '{ synonym } ' has { entry_count } properties: { ids } " ,
430+ sheet .filename ,
431+ )
432+ )
409433
410434 return issues
411435
0 commit comments