Skip to content

Commit 6dbf962

Browse files
authored
Support Ruby procedural modifications (#231)
* Add 20 Ruby procedural bug modifiers with property analysis tree-sitter-based modifiers across 5 modules: control_flow (if/else invert, shuffle lines, guard clause invert), nil_introduction (safe navigation, or-default, presence, bang method, or-equals, nil guard), operations (change, flip, swap operands, break chains, change constants), remove (loops, conditionals, assignments, rescue/ensure), and ruby_specific (symbol/string swap, block mutation). Includes base class with _remove_matching_nodes helper and Ruby adapter property analysis. * Add 22 Ruby repo profiles with RSpec and Minitest log parsers Profiles for Faker, RuboCop, Jekyll, and 19 new repos including 3 SWE-bench_Multilingual eval_set repos (fluentd, fastlane, fpm). RSpec JSON and Minitest/test-unit log parsers with auto-detection. RSpec test path detection (_is_test_path override for spec/ and _spec.rb).
1 parent 9f2ba94 commit 6dbf962

21 files changed

+3654
-1
lines changed

scripts/bug_gen_modal.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ def custom_exception_handler(loop, context):
133133
"typescript": "TypeScriptProfile",
134134
"golang": "GoProfile",
135135
"go": "GoProfile",
136+
"ruby": "RubyProfile",
136137
"rust": "RustProfile",
137138
"java": "JavaProfile",
138139
"c": "CProfile",

swesmith/bug_gen/adapters/ruby.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from swesmith.constants import TODO_REWRITE, CodeEntity
1+
from swesmith.constants import TODO_REWRITE, CodeEntity, CodeProperty
22
from tree_sitter import Language, Parser, Query, QueryCursor
33
import tree_sitter_ruby as tsr
44
import warnings
@@ -100,6 +100,63 @@ def walk(node) -> int:
100100

101101
return 1 + walk(self.node)
102102

103+
def _analyze_properties(self):
104+
"""Analyze Ruby code properties."""
105+
node = self.node
106+
if node.type in ["method", "singleton_method"]:
107+
self._tags.add(CodeProperty.IS_FUNCTION)
108+
self._walk_for_properties(node)
109+
110+
def _walk_for_properties(self, n):
111+
"""Walk the AST and analyze properties."""
112+
self._check_control_flow(n)
113+
self._check_operations(n)
114+
self._check_expressions(n)
115+
for child in n.children:
116+
self._walk_for_properties(child)
117+
118+
def _check_control_flow(self, n):
119+
"""Check for control flow patterns."""
120+
if n.type in ["if", "unless", "if_modifier", "unless_modifier"]:
121+
self._tags.add(CodeProperty.HAS_IF)
122+
if n.type in ["if", "unless"] and any(
123+
c.type in ["else", "elsif"] for c in n.children
124+
):
125+
self._tags.add(CodeProperty.HAS_IF_ELSE)
126+
if n.type in ["while", "until", "for", "while_modifier", "until_modifier"]:
127+
self._tags.add(CodeProperty.HAS_LOOP)
128+
if n.type in ["rescue", "ensure"]:
129+
self._tags.add(CodeProperty.HAS_EXCEPTION)
130+
131+
def _check_operations(self, n):
132+
"""Check for various operations."""
133+
if n.type in ["element_reference", "element_assignment"]:
134+
self._tags.add(CodeProperty.HAS_LIST_INDEXING)
135+
if n.type == "call":
136+
self._tags.add(CodeProperty.HAS_FUNCTION_CALL)
137+
if n.type == "return":
138+
self._tags.add(CodeProperty.HAS_RETURN)
139+
if n.type in ["assignment", "operator_assignment"]:
140+
self._tags.add(CodeProperty.HAS_ASSIGNMENT)
141+
if n.type in ["lambda", "block", "do_block"]:
142+
self._tags.add(CodeProperty.HAS_LAMBDA)
143+
144+
def _check_expressions(self, n):
145+
"""Check expression patterns."""
146+
if n.type == "binary":
147+
self._tags.add(CodeProperty.HAS_BINARY_OP)
148+
for child in n.children:
149+
if hasattr(child, "text"):
150+
text = child.text.decode("utf-8")
151+
if text in ["&&", "||", "and", "or"]:
152+
self._tags.add(CodeProperty.HAS_BOOL_OP)
153+
elif text in ["<", ">", "<=", ">="]:
154+
self._tags.add(CodeProperty.HAS_OFF_BY_ONE)
155+
if n.type == "unary":
156+
self._tags.add(CodeProperty.HAS_UNARY_OP)
157+
if n.type == "conditional": # Ruby ternary: cond ? a : b
158+
self._tags.add(CodeProperty.HAS_TERNARY)
159+
103160

104161
def get_entities_from_file_rb(
105162
entities: list[RubyEntity],

swesmith/bug_gen/procedural/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from swesmith.bug_gen.procedural.java import MODIFIERS_JAVA
1313
from swesmith.bug_gen.procedural.javascript import MODIFIERS_JAVASCRIPT
1414
from swesmith.bug_gen.procedural.python import MODIFIERS_PYTHON
15+
from swesmith.bug_gen.procedural.ruby import MODIFIERS_RUBY
1516
from swesmith.bug_gen.procedural.rust import MODIFIERS_RUST
1617
from swesmith.bug_gen.procedural.typescript import MODIFIERS_TYPESCRIPT
1718

@@ -25,6 +26,7 @@
2526
".hpp": MODIFIERS_CPP,
2627
".js": MODIFIERS_JAVASCRIPT,
2728
".py": MODIFIERS_PYTHON,
29+
".rb": MODIFIERS_RUBY,
2830
".rs": MODIFIERS_RUST,
2931
".ts": MODIFIERS_TYPESCRIPT,
3032
".tsx": MODIFIERS_TYPESCRIPT,
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from swesmith.bug_gen.procedural.base import ProceduralModifier
2+
from swesmith.bug_gen.procedural.ruby.control_flow import (
3+
ControlIfElseInvertModifier,
4+
ControlShuffleLinesModifier,
5+
GuardClauseInvertModifier,
6+
)
7+
from swesmith.bug_gen.procedural.ruby.nil_introduction import (
8+
BangMethodStripModifier,
9+
NilGuardRemovalModifier,
10+
OrDefaultRemovalModifier,
11+
OrEqualsRemovalModifier,
12+
PresenceStripModifier,
13+
SafeNavigationRemovalModifier,
14+
)
15+
from swesmith.bug_gen.procedural.ruby.operations import (
16+
OperationBreakChainsModifier,
17+
OperationChangeConstantsModifier,
18+
OperationChangeModifier,
19+
OperationFlipOperatorModifier,
20+
OperationSwapOperandsModifier,
21+
)
22+
from swesmith.bug_gen.procedural.ruby.remove import (
23+
RemoveAssignModifier,
24+
RemoveConditionalModifier,
25+
RemoveLoopModifier,
26+
RemoveRescueEnsureModifier,
27+
)
28+
from swesmith.bug_gen.procedural.ruby.ruby_specific import (
29+
BlockMutationModifier,
30+
SymbolStringSwapModifier,
31+
)
32+
33+
MODIFIERS_RUBY: list[ProceduralModifier] = [
34+
# Standard modifiers (CommonPMs)
35+
ControlIfElseInvertModifier(likelihood=0.75),
36+
ControlShuffleLinesModifier(likelihood=0.75),
37+
OperationChangeModifier(likelihood=0.4),
38+
OperationFlipOperatorModifier(likelihood=0.4),
39+
OperationSwapOperandsModifier(likelihood=0.4),
40+
OperationBreakChainsModifier(likelihood=0.3),
41+
OperationChangeConstantsModifier(likelihood=0.4),
42+
RemoveAssignModifier(likelihood=0.25),
43+
RemoveConditionalModifier(likelihood=0.25),
44+
RemoveLoopModifier(likelihood=0.25),
45+
# Ruby-specific
46+
GuardClauseInvertModifier(likelihood=0.6),
47+
RemoveRescueEnsureModifier(likelihood=0.4),
48+
BlockMutationModifier(likelihood=0.4),
49+
SymbolStringSwapModifier(likelihood=0.5),
50+
# Nil introduction
51+
SafeNavigationRemovalModifier(likelihood=0.5),
52+
OrDefaultRemovalModifier(likelihood=0.4),
53+
PresenceStripModifier(likelihood=0.5),
54+
BangMethodStripModifier(likelihood=0.4),
55+
OrEqualsRemovalModifier(likelihood=0.4),
56+
NilGuardRemovalModifier(likelihood=0.5),
57+
]
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
from abc import ABC
2+
3+
import tree_sitter_ruby as tsruby
4+
from tree_sitter import Language, Parser
5+
6+
from swesmith.bug_gen.procedural.base import ProceduralModifier
7+
from swesmith.constants import BugRewrite, CodeEntity
8+
9+
RUBY_LANGUAGE = Language(tsruby.language())
10+
11+
12+
class RubyProceduralModifier(ProceduralModifier, ABC):
13+
"""Base class for Ruby-specific procedural modifications."""
14+
15+
@staticmethod
16+
def validate_syntax(original: str, modified: str) -> bool | None:
17+
"""Return True if valid, False if errors, None if unchanged."""
18+
if original == modified:
19+
return None
20+
parser = Parser(RUBY_LANGUAGE)
21+
tree = parser.parse(bytes(modified, "utf8"))
22+
23+
def has_errors(node):
24+
if node.type in ("ERROR", "MISSING"):
25+
return True
26+
return any(has_errors(c) for c in node.children)
27+
28+
return not has_errors(tree.root_node)
29+
30+
@staticmethod
31+
def find_nodes(node, *types) -> list:
32+
"""Recursively find all AST nodes matching any of the given types.
33+
34+
Note: tree-sitter Ruby reuses the type name for both compound
35+
statement nodes and their keyword tokens (e.g. ``while`` appears
36+
as both the loop node and its keyword child). Callers searching
37+
for compound statements like ``while``, ``if``, ``rescue`` etc.
38+
should filter out leaf nodes (``n.children == 0``) to avoid
39+
matching bare keywords.
40+
"""
41+
results = []
42+
43+
def walk(n):
44+
if n.type in types:
45+
results.append(n)
46+
for child in n.children:
47+
walk(child)
48+
49+
walk(node)
50+
return results
51+
52+
@staticmethod
53+
def replace_node(code: str, node, replacement: str) -> str:
54+
"""Replace a tree-sitter node's text via byte offsets."""
55+
code_bytes = code.encode("utf8")
56+
new_bytes = (
57+
code_bytes[: node.start_byte]
58+
+ replacement.encode("utf8")
59+
+ code_bytes[node.end_byte :]
60+
)
61+
return new_bytes.decode("utf8")
62+
63+
def _remove_matching_nodes(
64+
self, code_entity: CodeEntity, *node_types: str, validate: bool = False
65+
) -> BugRewrite | None:
66+
"""Remove AST nodes matching the given types from the source code."""
67+
if not self.flip():
68+
return None
69+
70+
parser = Parser(RUBY_LANGUAGE)
71+
tree = parser.parse(bytes(code_entity.src_code, "utf8"))
72+
73+
removals = []
74+
75+
def collect(n):
76+
# Keyword tokens (e.g. `while` inside while_modifier) are leaf
77+
# nodes in tree-sitter; compound statements always have children.
78+
if n.type in node_types and n.children and self.flip():
79+
removals.append(n)
80+
return # skip children to avoid stale byte offsets on nested removals
81+
for child in n.children:
82+
collect(child)
83+
84+
collect(tree.root_node)
85+
86+
if not removals:
87+
return None
88+
89+
source_bytes = code_entity.src_code.encode("utf8")
90+
for node in sorted(removals, key=lambda x: x.start_byte, reverse=True):
91+
source_bytes = (
92+
source_bytes[: node.start_byte] + source_bytes[node.end_byte :]
93+
)
94+
95+
modified_code = source_bytes.decode("utf8")
96+
97+
if validate:
98+
valid = self.validate_syntax(code_entity.src_code, modified_code)
99+
if not valid:
100+
return None
101+
elif modified_code == code_entity.src_code:
102+
return None
103+
104+
return BugRewrite(
105+
rewrite=modified_code,
106+
explanation=self.explanation,
107+
strategy=self.name,
108+
)

0 commit comments

Comments
 (0)