@@ -203,9 +203,9 @@ public IRubyObject set_string(ThreadContext context, IRubyObject str) {
203203 }
204204
205205 @ JRubyMethod (name = {"concat" , "<<" })
206- public IRubyObject concat (ThreadContext context , IRubyObject obj ) {
206+ public IRubyObject concat (ThreadContext context , IRubyObject str ) {
207207 check (context );
208- str .append (obj . convertToString ( ));
208+ this . str .append (RubyString . stringValue ( str ));
209209 return this ;
210210 }
211211
@@ -261,7 +261,7 @@ private IRubyObject extractBegLen(Ruby runtime, int beg, int len) {
261261 }
262262
263263 // MRI: strscan_do_scan
264- private IRubyObject scan (ThreadContext context , IRubyObject regex , boolean succptr , boolean getstr , boolean headonly ) {
264+ private IRubyObject scan (ThreadContext context , IRubyObject pattern , boolean succptr , boolean getstr , boolean headonly ) {
265265 final Ruby runtime = context .runtime ;
266266 check (context );
267267 clearMatchStatus ();
@@ -274,12 +274,12 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp
274274 ByteList strBL = str .getByteList ();
275275 int currPtr = strBL .getBegin () + curr ;
276276
277- if (regex instanceof RubyRegexp ) {
278- pattern = ((RubyRegexp ) regex ).preparePattern (str );
277+ if (pattern instanceof RubyRegexp ) {
278+ this . pattern = ((RubyRegexp ) pattern ).preparePattern (str );
279279
280280 int range = currPtr + restLen ;
281281
282- Matcher matcher = pattern .matcher (strBL .getUnsafeBytes (), matchTarget (), range );
282+ Matcher matcher = this . pattern .matcher (strBL .getUnsafeBytes (), matchTarget (), range );
283283 final int ret ;
284284 if (headonly ) {
285285 ret = RubyRegexp .matcherMatch (context , matcher , currPtr , range , Option .NONE );
@@ -294,26 +294,28 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp
294294 regs = matchRegion ;
295295 }
296296
297- if (ret == - 2 ) {
298- throw runtime . newRaiseException (( RubyClass ) getMetaClass (). getConstant ( "ScanError" ), "regexp buffer overflow" ) ;
297+ if (ret < 0 ) { // MISMATCH
298+ return context . nil ;
299299 }
300- if (ret < 0 ) return context .nil ;
301300 } else {
302- RubyString pattern = regex .convertToString ();
303- Encoding patternEnc = str .checkEncoding (pattern );
304- ByteList patternBL = pattern .getByteList ();
305- int patternSize = patternBL .realSize ();
301+ RubyString patternStr = RubyString .stringValue (pattern );
302+ ByteList patternBL = patternStr .getByteList ();
303+ final int patternSize = patternBL .realSize ();
304+
305+ if (restLen < patternSize ) {
306+ str .checkEncoding (patternStr );
307+ return context .nil ;
308+ }
306309
307310 if (headonly ) {
308- if (restLen < pattern .size ()) {
309- return context .nil ;
310- }
311+ str .checkEncoding (patternStr );
312+
311313 if (ByteList .memcmp (strBL .unsafeBytes (), currPtr , patternBL .unsafeBytes (), patternBL .begin (), patternSize ) != 0 ) {
312314 return context .nil ;
313315 }
314316 setRegisters (0 , patternSize );
315317 } else {
316- int pos = StringSupport .index (strBL , patternBL , currPtr , patternEnc );
318+ int pos = StringSupport .index (strBL , patternBL , currPtr , str . checkEncoding ( patternStr ) );
317319 if (pos == -1 ) {
318320 return context .nil ;
319321 }
@@ -705,7 +707,7 @@ public IRubyObject op_aref(ThreadContext context, IRubyObject idx) {
705707
706708 if (idx instanceof RubySymbol || idx instanceof RubyString ) {
707709 if (pattern == null ) {
708- throw runtime .newRaiseException (( RubyClass ) getMetaClass (). getConstant ( "IndexError" ), "undefined group name reference: " + idx );
710+ throw runtime .newRaiseException (runtime . getIndexError ( ), "undefined group name reference: " + idx );
709711 }
710712 }
711713
@@ -943,12 +945,25 @@ public IRubyObject values_at(ThreadContext context, IRubyObject index1, IRubyObj
943945
944946 // MRI: str_new
945947 private RubyString newString (Ruby runtime , int start , int length ) {
946- ByteList byteList = str .getByteList ();
947- int begin = byteList . begin ();
948+ final ByteList strBytes = this . str .getByteList ();
949+ ByteList newBytes = new ByteList ( strBytes . unsafeBytes (), strBytes . begin () + start , length , true );
948950
949- ByteList newByteList = new ByteList (byteList .unsafeBytes (), begin + start , length , byteList .getEncoding (), true );
951+ final RubyString newStr = RubyString .newString (runtime , newBytes , strBytes .getEncoding ());
952+ copyCodeRangeForSubstr (newStr , this .str );
953+ return newStr ;
954+ }
950955
951- return RubyString .newString (runtime , newByteList );
956+ /**
957+ * Same as JRuby's (private) <code>RubyString#copyCodeRangeForSubstr</code>.
958+ * Isn't really necessary, but will avoid extra code-range scans for the substrings returned.
959+ */
960+ private void copyCodeRangeForSubstr (RubyString str , RubyString from ) {
961+ if (str .size () == 0 ) {
962+ str .setCodeRange (from .getEncoding ().isAsciiCompatible () ? StringSupport .CR_7BIT : StringSupport .CR_VALID );
963+ } else {
964+ if (from .getCodeRange () == StringSupport .CR_7BIT ) str .setCodeRange (StringSupport .CR_7BIT );
965+ // otherwise, leave it as CR_UNKNOWN
966+ }
952967 }
953968
954969 /**
0 commit comments