Skip to content
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
3a41a8c
Add 20 Ruby procedural bug modifiers with property analysis
cosgroveb Mar 3, 2026
447d254
Add 22 Ruby repo profiles with RSpec and Minitest log parsers
cosgroveb Mar 3, 2026
55dc8b3
Add tests for Ruby modifiers, property analysis, and profiles
cosgroveb Mar 3, 2026
3ebac51
Fix operator precedence in test-unit parser and nested node removal
cosgroveb Mar 3, 2026
676a6eb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 3, 2026
7a1fd92
Add 26 tests covering flip-failure, no-candidate, and edge-case paths
cosgroveb Mar 3, 2026
5a09425
Fix malformed JSON test to actually hit JSONDecodeError path
cosgroveb Mar 3, 2026
26811ce
Assert all property tags for parse_query and _normalize_params
cosgroveb Mar 4, 2026
49b465d
Assert both directions of if/else swap in invert test
cosgroveb Mar 4, 2026
f805798
Pad no-else test method to meet min_complexity threshold
cosgroveb Mar 4, 2026
fad3a37
Add unless/else invert test for ControlIfElseInvertModifier
cosgroveb Mar 4, 2026
9713734
Add test for OrDefaultRemovalModifier with Ruby `or` keyword
cosgroveb Mar 4, 2026
458d0f1
Remove find_sole_by! from BANG_METHODS — no bang version exists
cosgroveb Mar 4, 2026
2e7b9ba
Add parametrized guard removal tests for unless and raise keywords
cosgroveb Mar 4, 2026
744f43d
Add missing self.flip() to ControlShuffleLinesModifier
cosgroveb Mar 4, 2026
80b32f4
Add shuffle test for singleton_method (def self.foo) nodes
cosgroveb Mar 5, 2026
48eceef
Simplify safe navigation check to just child.type
cosgroveb Mar 5, 2026
255ab80
Add ** to ARITHMETIC_OPS, remove redundant ALL_BINARY_OPS entry
cosgroveb Mar 5, 2026
c8cc206
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 5, 2026
4e12735
Add OperationChangeModifier tests for comparison, logical, and keywor…
cosgroveb Mar 5, 2026
ea0c9c7
Add regexp operators =~ and !~ to FLIPPED_OPERATORS and REGEX_OPS
cosgroveb Mar 5, 2026
9d70cb7
Add range operator flipping between inclusive .. and exclusive ...
cosgroveb Mar 5, 2026
467eb5c
Update ChangeConstants docstring to reflect float handling
cosgroveb Mar 5, 2026
8e73e2c
Unroll parametrized remove test into standalone functions
cosgroveb Mar 5, 2026
e4050e1
Add remove tests for until and for loop types
cosgroveb Mar 5, 2026
ec28bf3
Add remove test for unless conditional
cosgroveb Mar 5, 2026
ee6b43e
Add ensure block to rescue removal test
cosgroveb Mar 5, 2026
0b872ae
Extract regex pattern to _RUBY_IDENTIFIER_PATTERN constant
cosgroveb Mar 5, 2026
6e4ab58
Filter then keyword from then_stmts to fix if/then/else inversion
cosgroveb Mar 5, 2026
8f15d2a
Replace find! with take! in BANG_METHODS — find already raises
cosgroveb Mar 6, 2026
fc85b17
Fix nested binary corruption in OperationSwapOperandsModifier
cosgroveb Mar 6, 2026
a525ee9
Skip keyword leaf nodes in _remove_matching_nodes
cosgroveb Mar 6, 2026
dc69a94
Add regression test for rescue keyword-only deletion
cosgroveb Mar 6, 2026
8598c92
Exclude conditional contexts from OrDefaultRemovalModifier
cosgroveb Mar 6, 2026
7805146
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 6, 2026
6e6dc9a
Check immediate parent only in OrDefaultRemovalModifier conditional f…
cosgroveb Mar 8, 2026
d8d1bbf
Remove phantom error status from RSpec JSON parser
cosgroveb Mar 8, 2026
642ba09
Map E status to TestStatus.ERROR in Minitest and test-unit parsers
cosgroveb Mar 9, 2026
dd91f0c
Move SWE-bench_Multilingual comment above all multilingual repos
cosgroveb Mar 9, 2026
07292f2
Walk condition field to detect || inside conditionals
cosgroveb Mar 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/bug_gen_modal.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def custom_exception_handler(loop, context):
"typescript": "TypeScriptProfile",
"golang": "GoProfile",
"go": "GoProfile",
"ruby": "RubyProfile",
"rust": "RustProfile",
"java": "JavaProfile",
"c": "CProfile",
Expand Down
59 changes: 58 additions & 1 deletion swesmith/bug_gen/adapters/ruby.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from swesmith.constants import TODO_REWRITE, CodeEntity
from swesmith.constants import TODO_REWRITE, CodeEntity, CodeProperty
from tree_sitter import Language, Parser, Query, QueryCursor
import tree_sitter_ruby as tsr
import warnings
Expand Down Expand Up @@ -100,6 +100,63 @@ def walk(node) -> int:

return 1 + walk(self.node)

def _analyze_properties(self):
"""Analyze Ruby code properties."""
node = self.node
if node.type in ["method", "singleton_method"]:
self._tags.add(CodeProperty.IS_FUNCTION)
self._walk_for_properties(node)

def _walk_for_properties(self, n):
"""Walk the AST and analyze properties."""
self._check_control_flow(n)
self._check_operations(n)
self._check_expressions(n)
for child in n.children:
self._walk_for_properties(child)

def _check_control_flow(self, n):
"""Check for control flow patterns."""
if n.type in ["if", "unless", "if_modifier", "unless_modifier"]:
self._tags.add(CodeProperty.HAS_IF)
if n.type in ["if", "unless"] and any(
c.type in ["else", "elsif"] for c in n.children
):
self._tags.add(CodeProperty.HAS_IF_ELSE)
if n.type in ["while", "until", "for", "while_modifier", "until_modifier"]:
self._tags.add(CodeProperty.HAS_LOOP)
if n.type in ["rescue", "ensure"]:
self._tags.add(CodeProperty.HAS_EXCEPTION)

def _check_operations(self, n):
"""Check for various operations."""
if n.type in ["element_reference", "element_assignment"]:
self._tags.add(CodeProperty.HAS_LIST_INDEXING)
if n.type == "call":
self._tags.add(CodeProperty.HAS_FUNCTION_CALL)
if n.type == "return":
self._tags.add(CodeProperty.HAS_RETURN)
if n.type in ["assignment", "operator_assignment"]:
self._tags.add(CodeProperty.HAS_ASSIGNMENT)
if n.type in ["lambda", "block", "do_block"]:
self._tags.add(CodeProperty.HAS_LAMBDA)

def _check_expressions(self, n):
"""Check expression patterns."""
if n.type == "binary":
self._tags.add(CodeProperty.HAS_BINARY_OP)
for child in n.children:
if hasattr(child, "text"):
text = child.text.decode("utf-8")
if text in ["&&", "||", "and", "or"]:
self._tags.add(CodeProperty.HAS_BOOL_OP)
elif text in ["<", ">", "<=", ">="]:
self._tags.add(CodeProperty.HAS_OFF_BY_ONE)
if n.type == "unary":
self._tags.add(CodeProperty.HAS_UNARY_OP)
if n.type == "conditional": # Ruby ternary: cond ? a : b
self._tags.add(CodeProperty.HAS_TERNARY)


def get_entities_from_file_rb(
entities: list[RubyEntity],
Expand Down
2 changes: 2 additions & 0 deletions swesmith/bug_gen/procedural/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from swesmith.bug_gen.procedural.java import MODIFIERS_JAVA
from swesmith.bug_gen.procedural.javascript import MODIFIERS_JAVASCRIPT
from swesmith.bug_gen.procedural.python import MODIFIERS_PYTHON
from swesmith.bug_gen.procedural.ruby import MODIFIERS_RUBY
from swesmith.bug_gen.procedural.rust import MODIFIERS_RUST
from swesmith.bug_gen.procedural.typescript import MODIFIERS_TYPESCRIPT

Expand All @@ -25,6 +26,7 @@
".hpp": MODIFIERS_CPP,
".js": MODIFIERS_JAVASCRIPT,
".py": MODIFIERS_PYTHON,
".rb": MODIFIERS_RUBY,
".rs": MODIFIERS_RUST,
".ts": MODIFIERS_TYPESCRIPT,
".tsx": MODIFIERS_TYPESCRIPT,
Expand Down
57 changes: 57 additions & 0 deletions swesmith/bug_gen/procedural/ruby/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from swesmith.bug_gen.procedural.base import ProceduralModifier
from swesmith.bug_gen.procedural.ruby.control_flow import (
ControlIfElseInvertModifier,
ControlShuffleLinesModifier,
GuardClauseInvertModifier,
)
from swesmith.bug_gen.procedural.ruby.nil_introduction import (
BangMethodStripModifier,
NilGuardRemovalModifier,
OrDefaultRemovalModifier,
OrEqualsRemovalModifier,
PresenceStripModifier,
SafeNavigationRemovalModifier,
)
from swesmith.bug_gen.procedural.ruby.operations import (
OperationBreakChainsModifier,
OperationChangeConstantsModifier,
OperationChangeModifier,
OperationFlipOperatorModifier,
OperationSwapOperandsModifier,
)
from swesmith.bug_gen.procedural.ruby.remove import (
RemoveAssignModifier,
RemoveConditionalModifier,
RemoveLoopModifier,
RemoveRescueEnsureModifier,
)
from swesmith.bug_gen.procedural.ruby.ruby_specific import (
BlockMutationModifier,
SymbolStringSwapModifier,
)

MODIFIERS_RUBY: list[ProceduralModifier] = [
# Standard modifiers (CommonPMs)
ControlIfElseInvertModifier(likelihood=0.75),
ControlShuffleLinesModifier(likelihood=0.75),
OperationChangeModifier(likelihood=0.4),
OperationFlipOperatorModifier(likelihood=0.4),
OperationSwapOperandsModifier(likelihood=0.4),
OperationBreakChainsModifier(likelihood=0.3),
OperationChangeConstantsModifier(likelihood=0.4),
RemoveAssignModifier(likelihood=0.25),
RemoveConditionalModifier(likelihood=0.25),
RemoveLoopModifier(likelihood=0.25),
# Ruby-specific
GuardClauseInvertModifier(likelihood=0.6),
RemoveRescueEnsureModifier(likelihood=0.4),
BlockMutationModifier(likelihood=0.4),
SymbolStringSwapModifier(likelihood=0.5),
# Nil introduction
SafeNavigationRemovalModifier(likelihood=0.5),
OrDefaultRemovalModifier(likelihood=0.4),
PresenceStripModifier(likelihood=0.5),
BangMethodStripModifier(likelihood=0.4),
OrEqualsRemovalModifier(likelihood=0.4),
NilGuardRemovalModifier(likelihood=0.5),
]
108 changes: 108 additions & 0 deletions swesmith/bug_gen/procedural/ruby/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from abc import ABC

import tree_sitter_ruby as tsruby
from tree_sitter import Language, Parser

from swesmith.bug_gen.procedural.base import ProceduralModifier
from swesmith.constants import BugRewrite, CodeEntity

RUBY_LANGUAGE = Language(tsruby.language())


class RubyProceduralModifier(ProceduralModifier, ABC):
"""Base class for Ruby-specific procedural modifications."""

@staticmethod
def validate_syntax(original: str, modified: str) -> bool | None:
"""Return True if valid, False if errors, None if unchanged."""
if original == modified:
return None
parser = Parser(RUBY_LANGUAGE)
tree = parser.parse(bytes(modified, "utf8"))

def has_errors(node):
if node.type in ("ERROR", "MISSING"):
return True
return any(has_errors(c) for c in node.children)

return not has_errors(tree.root_node)

@staticmethod
def find_nodes(node, *types) -> list:
"""Recursively find all AST nodes matching any of the given types.

Note: tree-sitter Ruby reuses the type name for both compound
statement nodes and their keyword tokens (e.g. ``while`` appears
as both the loop node and its keyword child). Callers searching
for compound statements like ``while``, ``if``, ``rescue`` etc.
should filter out leaf nodes (``n.children == 0``) to avoid
matching bare keywords.
"""
results = []

def walk(n):
if n.type in types:
results.append(n)
for child in n.children:
walk(child)

walk(node)
return results

@staticmethod
def replace_node(code: str, node, replacement: str) -> str:
"""Replace a tree-sitter node's text via byte offsets."""
code_bytes = code.encode("utf8")
new_bytes = (
code_bytes[: node.start_byte]
+ replacement.encode("utf8")
+ code_bytes[node.end_byte :]
)
return new_bytes.decode("utf8")

def _remove_matching_nodes(
self, code_entity: CodeEntity, *node_types: str, validate: bool = False
) -> BugRewrite | None:
"""Remove AST nodes matching the given types from the source code."""
if not self.flip():
return None

parser = Parser(RUBY_LANGUAGE)
tree = parser.parse(bytes(code_entity.src_code, "utf8"))

removals = []

def collect(n):
# Keyword tokens (e.g. `while` inside while_modifier) are leaf
# nodes in tree-sitter; compound statements always have children.
if n.type in node_types and n.children and self.flip():
removals.append(n)
return # skip children to avoid stale byte offsets on nested removals
for child in n.children:
collect(child)

collect(tree.root_node)

if not removals:
return None

source_bytes = code_entity.src_code.encode("utf8")
for node in sorted(removals, key=lambda x: x.start_byte, reverse=True):
source_bytes = (
source_bytes[: node.start_byte] + source_bytes[node.end_byte :]
)

modified_code = source_bytes.decode("utf8")

if validate:
valid = self.validate_syntax(code_entity.src_code, modified_code)
if not valid:
return None
elif modified_code == code_entity.src_code:
return None

return BugRewrite(
rewrite=modified_code,
explanation=self.explanation,
strategy=self.name,
)
Loading