@@ -1215,3 +1215,104 @@ def test_multiple_occurrence_handling(self, annotator):
12151215 cat_spans = [s for s in spans if s .text .lower () == "cat" ]
12161216 assert len (cat_spans ) == 1
12171217 assert cat_spans [0 ].span == (4 , 6 ) # "cat" position
1218+
1219+ def test_production_parentheses_property_error_august_2025 (self , annotator ):
1220+ """Test property name error with parentheses that caused sequence 10 failure."""
1221+
1222+ # This is the exact pattern that failed in sequence 10 of the pipeline run
1223+ # Error: "Expecting property name enclosed in double quotes: line 10 column 15 (char 361)"
1224+ # The issue appears to be with {"text":"(" character patterns
1225+ parentheses_error_pattern = '''[
1226+ {"text":"span","xbar_label":"noun"},
1227+ {"text":"is","xbar_label":"verb"},
1228+ {"text":"scored","xbar_label":"verb"},
1229+ {"text":"by","xbar_label":"preposition"},
1230+ {"text":"a","xbar_label":"determiner"},
1231+ {"text":"parameterized","xbar_label":"adjective"},
1232+ {"text":"function","xbar_label":"keyword"},
1233+ {"text":"fe","xbar_label":"identifier"},
1234+ {"text":"(",xbar_label":"operator"},
1235+ {"text":"w","xbar_label":"literal"}
1236+ ]'''
1237+
1238+ # This should either succeed after repair or handle the error gracefully
1239+ try :
1240+ annotations = annotator .json_parser .parse_json_response (parentheses_error_pattern )
1241+ assert isinstance (annotations , list )
1242+ # If successful, should have parsed some annotations
1243+ assert len (annotations ) >= 0
1244+ # Should handle the parentheses character properly
1245+ paren_spans = [a for a in annotations if a .get ("text" ) == "(" ]
1246+ if paren_spans :
1247+ assert paren_spans [0 ]["xbar_label" ] == "operator"
1248+ except ValueError as e :
1249+ # If repair fails, should provide a meaningful error message
1250+ assert "JSON" in str (e ) or "property name" in str (e )
1251+ logger .warning (f"Parentheses pattern failed to repair: { e } " )
1252+
1253+ def test_exact_sequence_10_error_pattern_august_2025 (self , annotator ):
1254+ """Test the exact error pattern from sequence 10 that caused pipeline failure."""
1255+
1256+ # Based on the console output, this appears to be the pattern at char 361
1257+ exact_error_pattern = '''[
1258+ {"text":"span","xbar_label":"noun"},
1259+ {"text":"is","xbar_label":"verb"},
1260+ {"text":"scored","xbar_label":"verb"},
1261+ {"text":"by","xbar_label":"preposition"},
1262+ {"text":"a","xbar_label":"determiner"},
1263+ {"text":"parameterized","xbar_label":"adjective"},
1264+ {"text":"function","xbar_label":"keyword"},
1265+ {"text":"fe","xbar_label":"identifier"},
1266+ {"text":"(",xbar_label:"operator"},
1267+ {"text":"w","xbar_label":"literal"}
1268+ ]'''
1269+
1270+ # This exact pattern should now be repairable with our enhanced fix
1271+ annotations = annotator .json_parser .parse_json_response (exact_error_pattern )
1272+ assert isinstance (annotations , list )
1273+ assert len (annotations ) >= 8 # Should parse most annotations even if some fail
1274+
1275+ # Specifically check that the parentheses annotation can be handled
1276+ paren_spans = [a for a in annotations if a .get ("text" ) == "(" ]
1277+ if paren_spans :
1278+ assert paren_spans [0 ]["xbar_label" ] == "operator"
1279+
1280+ def test_production_malformed_parentheses_variants_august_2025 (self , annotator ):
1281+ """Test various malformed parentheses patterns that might occur."""
1282+
1283+ # Pattern 1: Missing closing quote before parentheses
1284+ pattern1 = '''[
1285+ {"text":"function","xbar_label":"keyword"},
1286+ {"text":"fe","xbar_label":"identifier"},
1287+ {"text":"(",xbar_label":"operator"}
1288+ ]'''
1289+
1290+ # Pattern 2: Missing quote after parentheses
1291+ pattern2 = '''[
1292+ {"text":"function","xbar_label":"keyword"},
1293+ {"text":"(","xbar_label":"operator"},
1294+ {"text":"value","xbar_label":"literal"}
1295+ ]'''
1296+
1297+ # Pattern 3: Both opening and closing parentheses
1298+ pattern3 = '''[
1299+ {"text":"function","xbar_label":"keyword"},
1300+ {"text":"(","xbar_label":"operator"},
1301+ {"text":")","xbar_label":"operator"}
1302+ ]'''
1303+
1304+ patterns = [pattern1 , pattern2 , pattern3 ]
1305+
1306+ for i , pattern in enumerate (patterns ):
1307+ try :
1308+ annotations = annotator .json_parser .parse_json_response (pattern )
1309+ assert isinstance (annotations , list )
1310+ # Should handle parentheses characters if repaired successfully
1311+ paren_spans = [a for a in annotations if a .get ("text" ) in ["(" , ")" ]]
1312+ if paren_spans :
1313+ assert all (a ["xbar_label" ] == "operator" for a in paren_spans )
1314+ logger .info (f"Pattern { i + 1 } successfully parsed with { len (annotations )} annotations" )
1315+ except ValueError as e :
1316+ # Some patterns may not be repairable
1317+ logger .warning (f"Pattern { i + 1 } failed: { e } " )
1318+ pass
0 commit comments