diff --git a/core/matchdata/bytebegin_spec.rb b/core/matchdata/bytebegin_spec.rb new file mode 100644 index 0000000000..08c1fd6d1e --- /dev/null +++ b/core/matchdata/bytebegin_spec.rb @@ -0,0 +1,132 @@ +require_relative '../../spec_helper' + +ruby_version_is "3.4" do + describe "MatchData#bytebegin" do + context "when passed an integer argument" do + it "returns the byte-based offset of the start of the nth element" do + match_data = /(.)(.)(\d+)(\d)/.match("THX1138.") + match_data.bytebegin(0).should == 1 + match_data.bytebegin(2).should == 2 + end + + it "returns nil when the nth match isn't found" do + match_data = /something is( not)? (right)/.match("something is right") + match_data.bytebegin(1).should be_nil + end + + it "returns the byte-based offset for multi-byte strings" do + match_data = /(.)(.)(\d+)(\d)/.match("TñX1138.") + match_data.bytebegin(0).should == 1 + match_data.bytebegin(2).should == 3 + end + + not_supported_on :opal do + it "returns the byte-based offset for multi-byte strings with unicode regexp" do + match_data = /(.)(.)(\d+)(\d)/u.match("TñX1138.") + match_data.bytebegin(0).should == 1 + match_data.bytebegin(2).should == 3 + end + end + + it "tries to convert the passed argument to an Integer using #to_int" do + obj = mock('to_int') + obj.should_receive(:to_int).and_return(2) + + match_data = /(.)(.)(\d+)(\d)/.match("THX1138.") + match_data.bytebegin(obj).should == 2 + end + + it "raises IndexError if index is out of bounds" do + match_data = /(?foo)(?bar)/.match("foobar") + + -> { + match_data.bytebegin(-1) + }.should raise_error(IndexError, "index -1 out of matches") + + -> { + match_data.bytebegin(3) + }.should raise_error(IndexError, "index 3 out of matches") + end + end + + context "when passed a String argument" do + it "return the byte-based offset of the start of the named capture" do + match_data = /(?.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.bytebegin("a").should == 1 + match_data.bytebegin("b").should == 3 + end + + it "returns the byte-based offset for multi byte strings" do + match_data = /(?.)(.)(?\d+)(\d)/.match("TñX1138.") + match_data.bytebegin("a").should == 1 + match_data.bytebegin("b").should == 4 + end + + not_supported_on :opal do + it "returns the byte-based offset for multi byte strings with unicode regexp" do + match_data = /(?.)(.)(?\d+)(\d)/u.match("TñX1138.") + match_data.bytebegin("a").should == 1 + match_data.bytebegin("b").should == 4 + end + end + + it "returns the byte-based offset for the farthest match when multiple named captures use the same name" do + match_data = /(?.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.bytebegin("a").should == 3 + end + + it "returns the byte-based offset for multi-byte names" do + match_data = /(?<æ>.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.bytebegin("æ").should == 1 + end + + it "raises IndexError if there is no group with the provided name" do + match_data = /(?foo)(?bar)/.match("foobar") + + -> { + match_data.bytebegin("y") + }.should raise_error(IndexError, "undefined group name reference: y") + end + end + + context "when passed a Symbol argument" do + it "return the byte-based offset of the start of the named capture" do + match_data = /(?.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.bytebegin(:a).should == 1 + match_data.bytebegin(:b).should == 3 + end + + it "returns the byte-based offset for multi byte strings" do + match_data = /(?.)(.)(?\d+)(\d)/.match("TñX1138.") + match_data.bytebegin(:a).should == 1 + match_data.bytebegin(:b).should == 4 + end + + not_supported_on :opal do + it "returns the byte-based offset for multi byte strings with unicode regexp" do + match_data = /(?.)(.)(?\d+)(\d)/u.match("TñX1138.") + match_data.bytebegin(:a).should == 1 + match_data.bytebegin(:b).should == 4 + end + end + + it "returns the byte-based offset for the farthest match when multiple named captures use the same name" do + match_data = /(?.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.bytebegin(:a).should == 3 + end + + it "returns the byte-based offset for multi-byte names" do + match_data = /(?<æ>.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.bytebegin(:æ).should == 1 + end + + it "raises IndexError if there is no group with the provided name" do + match_data = /(?foo)(?bar)/.match("foobar") + + -> { + match_data.bytebegin(:y) + }.should raise_error(IndexError, "undefined group name reference: y") + end + end + end +end diff --git a/core/matchdata/byteend_spec.rb b/core/matchdata/byteend_spec.rb new file mode 100644 index 0000000000..98015e287d --- /dev/null +++ b/core/matchdata/byteend_spec.rb @@ -0,0 +1,104 @@ +require_relative '../../spec_helper' + +ruby_version_is "3.4" do + describe "MatchData#byteend" do + context "when passed an integer argument" do + it "returns the byte-based offset of the end of the nth element" do + match_data = /(.)(.)(\d+)(\d)/.match("THX1138.") + match_data.byteend(0).should == 7 + match_data.byteend(2).should == 3 + end + + it "returns nil when the nth match isn't found" do + match_data = /something is( not)? (right)/.match("something is right") + match_data.byteend(1).should be_nil + end + + it "returns the byte-based offset for multi-byte strings" do + match_data = /(.)(.)(\d+)(\d)/.match("TñX1138.") + match_data.byteend(0).should == 8 + match_data.byteend(2).should == 4 + end + + not_supported_on :opal do + it "returns the byte-based offset for multi-byte strings with unicode regexp" do + match_data = /(.)(.)(\d+)(\d)/u.match("TñX1138.") + match_data.byteend(0).should == 8 + match_data.byteend(2).should == 4 + end + end + + it "tries to convert the passed argument to an Integer using #to_int" do + obj = mock('to_int') + obj.should_receive(:to_int).and_return(2) + + match_data = /(.)(.)(\d+)(\d)/.match("THX1138.") + match_data.byteend(obj).should == 3 + end + end + + context "when passed a String argument" do + it "return the byte-based offset of the start of the named capture" do + match_data = /(?.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.byteend("a").should == 2 + match_data.byteend("b").should == 6 + end + + it "returns the byte-based offset for multi byte strings" do + match_data = /(?.)(.)(?\d+)(\d)/.match("TñX1138.") + match_data.byteend("a").should == 3 + match_data.byteend("b").should == 7 + end + + not_supported_on :opal do + it "returns the byte-based offset for multi byte strings with unicode regexp" do + match_data = /(?.)(.)(?\d+)(\d)/u.match("TñX1138.") + match_data.byteend("a").should == 3 + match_data.byteend("b").should == 7 + end + end + + it "returns the byte-based offset for the farthest match when multiple named captures use the same name" do + match_data = /(?.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.byteend("a").should == 6 + end + + it "returns the byte-based offset for multi-byte names" do + match_data = /(?<æ>.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.byteend("æ").should == 2 + end + end + + context "when passed a Symbol argument" do + it "return the byte-based offset of the start of the named capture" do + match_data = /(?.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.byteend(:a).should == 2 + match_data.byteend(:b).should == 6 + end + + it "returns the byte-based offset for multi byte strings" do + match_data = /(?.)(.)(?\d+)(\d)/.match("TñX1138.") + match_data.byteend(:a).should == 3 + match_data.byteend(:b).should == 7 + end + + not_supported_on :opal do + it "returns the byte-based offset for multi byte strings with unicode regexp" do + match_data = /(?.)(.)(?\d+)(\d)/u.match("TñX1138.") + match_data.byteend(:a).should == 3 + match_data.byteend(:b).should == 7 + end + end + + it "returns the byte-based offset for the farthest match when multiple named captures use the same name" do + match_data = /(?.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.byteend(:a).should == 6 + end + + it "returns the byte-based offset for multi-byte names" do + match_data = /(?<æ>.)(.)(?\d+)(\d)/.match("THX1138.") + match_data.byteend(:æ).should == 2 + end + end + end +end