Skip to content

Commit 79cc3d2

Browse files
byroothsbt
authored andcommitted
StringScanner#scan_integer support base 16 integers (#116)
Followup: ruby/strscan#115 `scan_integer` is now implemented in Ruby as to efficiently handle keyword arguments without allocating a Hash. Given the goal of `scan_integer` is to more effciently parse integers without having to allocate an intermediary object, using `rb_scan_args` would defeat the purpose. Additionally, the C implementation now uses `rb_isdigit` and `rb_isxdigit`, because on Windows `isdigit` is locale dependent.
1 parent 5514485 commit 79cc3d2

File tree

4 files changed

+172
-27
lines changed

4 files changed

+172
-27
lines changed

ext/strscan/lib/strscan/strscan.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# frozen_string_literal: true
2+
3+
class StringScanner
4+
# call-seq:
5+
# scan_integer(base: 10)
6+
#
7+
# If `base` isn't provided or is `10`, then it is equivalent to calling `#scan` with a `[+-]?\d+` pattern,
8+
# and returns an Integer or nil.
9+
#
10+
# If `base` is `16`, then it is equivalent to calling `#scan` with a `[+-]?(0x)?[0-9a-fA-F]+` pattern,
11+
# and returns an Integer or nil.
12+
#
13+
# The scanned string must be encoded with an ASCII compatible encoding, otherwise
14+
# Encoding::CompatibilityError will be raised.
15+
def scan_integer(base: 10)
16+
case base
17+
when 10
18+
scan_base10_integer
19+
when 16
20+
scan_base16_integer
21+
else
22+
raise ArgumentError, "Unsupported integer base: #{base.inspect}, expected 10 or 16"
23+
end
24+
end
25+
end

ext/strscan/strscan.c

Lines changed: 66 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
extern size_t onig_region_memsize(const struct re_registers *regs);
2121
#endif
2222

23-
#include <ctype.h>
2423
#include <stdbool.h>
2524

2625
#define STRSCAN_VERSION "3.1.1.dev"
@@ -116,7 +115,7 @@ static VALUE strscan_get_byte _((VALUE self));
116115
static VALUE strscan_getbyte _((VALUE self));
117116
static VALUE strscan_peek _((VALUE self, VALUE len));
118117
static VALUE strscan_peep _((VALUE self, VALUE len));
119-
static VALUE strscan_scan_integer _((VALUE self));
118+
static VALUE strscan_scan_base10_integer _((VALUE self));
120119
static VALUE strscan_unscan _((VALUE self));
121120
static VALUE strscan_bol_p _((VALUE self));
122121
static VALUE strscan_eos_p _((VALUE self));
@@ -1268,21 +1267,26 @@ strscan_peep(VALUE self, VALUE vlen)
12681267
return strscan_peek(self, vlen);
12691268
}
12701269

1271-
/*
1272-
* call-seq:
1273-
* scan_integer
1274-
*
1275-
* Equivalent to #scan with a [+-]?\d+ pattern, and returns an Integer or nil.
1276-
*
1277-
* The scanned string must be encoded with an ASCII compatible encoding, otherwise
1278-
* Encoding::CompatibilityError will be raised.
1279-
*/
12801270
static VALUE
1281-
strscan_scan_integer(VALUE self)
1271+
strscan_parse_integer(struct strscanner *p, int base, long len)
12821272
{
1283-
char *ptr, *buffer;
1284-
long len = 0;
12851273
VALUE buffer_v, integer;
1274+
1275+
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
1276+
1277+
MEMCPY(buffer, CURPTR(p), char, len);
1278+
buffer[len] = '\0';
1279+
integer = rb_cstr2inum(buffer, base);
1280+
RB_ALLOCV_END(buffer_v);
1281+
p->curr += len;
1282+
return integer;
1283+
}
1284+
1285+
static VALUE
1286+
strscan_scan_base10_integer(VALUE self)
1287+
{
1288+
char *ptr;
1289+
long len = 0;
12861290
struct strscanner *p;
12871291

12881292
GET_SCANNER(self, p);
@@ -1302,25 +1306,60 @@ strscan_scan_integer(VALUE self)
13021306
len++;
13031307
}
13041308

1305-
if (!isdigit(ptr[len])) {
1309+
if (!rb_isdigit(ptr[len])) {
13061310
return Qnil;
13071311
}
13081312

13091313
MATCHED(p);
13101314
p->prev = p->curr;
13111315

1312-
while (len < remaining_len && isdigit(ptr[len])) {
1316+
while (len < remaining_len && rb_isdigit(ptr[len])) {
13131317
len++;
13141318
}
13151319

1316-
buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
1320+
return strscan_parse_integer(p, 10, len);
1321+
}
13171322

1318-
MEMCPY(buffer, CURPTR(p), char, len);
1319-
buffer[len] = '\0';
1320-
integer = rb_cstr2inum(buffer, 10);
1321-
RB_ALLOCV_END(buffer_v);
1322-
p->curr += len;
1323-
return integer;
1323+
static VALUE
1324+
strscan_scan_base16_integer(VALUE self)
1325+
{
1326+
char *ptr;
1327+
long len = 0;
1328+
struct strscanner *p;
1329+
1330+
GET_SCANNER(self, p);
1331+
CLEAR_MATCH_STATUS(p);
1332+
1333+
rb_must_asciicompat(p->str);
1334+
1335+
ptr = CURPTR(p);
1336+
1337+
long remaining_len = S_RESTLEN(p);
1338+
1339+
if (remaining_len <= 0) {
1340+
return Qnil;
1341+
}
1342+
1343+
if (ptr[len] == '-' || ptr[len] == '+') {
1344+
len++;
1345+
}
1346+
1347+
if ((remaining_len >= (len + 2)) && ptr[len] == '0' && ptr[len + 1] == 'x') {
1348+
len += 2;
1349+
}
1350+
1351+
if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
1352+
return Qnil;
1353+
}
1354+
1355+
MATCHED(p);
1356+
p->prev = p->curr;
1357+
1358+
while (len < remaining_len && rb_isxdigit(ptr[len])) {
1359+
len++;
1360+
}
1361+
1362+
return strscan_parse_integer(p, 16, len);
13241363
}
13251364

13261365
/*
@@ -2261,7 +2300,8 @@ Init_strscan(void)
22612300
rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
22622301
rb_define_method(StringScanner, "peep", strscan_peep, 1);
22632302

2264-
rb_define_method(StringScanner, "scan_integer", strscan_scan_integer, 0);
2303+
rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
2304+
rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);
22652305

22662306
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
22672307

@@ -2290,4 +2330,6 @@ Init_strscan(void)
22902330
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
22912331

22922332
rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
2333+
2334+
rb_require("strscan/strscan");
22932335
}

ext/strscan/strscan.gemspec

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,17 @@ Gem::Specification.new do |s|
1919
files = [
2020
"COPYING",
2121
"LICENSE.txt",
22+
"lib/strscan/strscan.rb"
2223
]
24+
25+
s.require_paths = %w{lib}
26+
2327
if RUBY_ENGINE == "jruby"
24-
s.require_paths = %w{ext/jruby/lib lib}
25-
files << "ext/jruby/lib/strscan.rb"
2628
files << "lib/strscan.jar"
29+
files << "ext/jruby/lib/strscan.rb"
30+
s.require_paths += %w{ext/jruby/lib}
2731
s.platform = "java"
2832
else
29-
s.require_paths = %w{lib}
3033
files << "ext/strscan/extconf.rb"
3134
files << "ext/strscan/strscan.c"
3235
s.rdoc_options << "-idoc"

test/strscan/test_stringscanner.rb

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,81 @@ def test_scan_integer_encoding
945945
s.scan_integer
946946
end
947947
end
948+
949+
def test_scan_integer_base_16
950+
omit "scan_integer isn't implemented on TruffleRuby yet" if RUBY_ENGINE == "truffleruby"
951+
952+
s = create_string_scanner('0')
953+
assert_equal 0x0, s.scan_integer(base: 16)
954+
assert_equal 1, s.pos
955+
assert_predicate s, :matched?
956+
957+
s = create_string_scanner('abc')
958+
assert_equal 0xabc, s.scan_integer(base: 16)
959+
assert_equal 3, s.pos
960+
assert_predicate s, :matched?
961+
962+
s = create_string_scanner('123abc')
963+
assert_equal 0x123abc, s.scan_integer(base: 16)
964+
assert_equal 6, s.pos
965+
assert_predicate s, :matched?
966+
967+
s = create_string_scanner('0x123abc')
968+
assert_equal 0x123abc, s.scan_integer(base: 16)
969+
assert_equal 8, s.pos
970+
assert_predicate s, :matched?
971+
972+
s = create_string_scanner('0x123ABC')
973+
assert_equal 0x123abc, s.scan_integer(base: 16)
974+
assert_equal 8, s.pos
975+
assert_predicate s, :matched?
976+
977+
s = create_string_scanner('-0x123ABC')
978+
assert_equal -0x123abc, s.scan_integer(base: 16)
979+
assert_equal 9, s.pos
980+
assert_predicate s, :matched?
981+
982+
s = create_string_scanner('+0x123ABC')
983+
assert_equal +0x123abc, s.scan_integer(base: 16)
984+
assert_equal 9, s.pos
985+
assert_predicate s, :matched?
986+
987+
s = create_string_scanner('0x')
988+
assert_nil s.scan_integer(base: 16)
989+
assert_equal 0, s.pos
990+
refute_predicate s, :matched?
991+
992+
s = create_string_scanner('-0x')
993+
assert_nil s.scan_integer(base: 16)
994+
assert_equal 0, s.pos
995+
refute_predicate s, :matched?
996+
997+
s = create_string_scanner('+0x')
998+
assert_nil s.scan_integer(base: 16)
999+
assert_equal 0, s.pos
1000+
refute_predicate s, :matched?
1001+
1002+
s = create_string_scanner('-123abc')
1003+
assert_equal -0x123abc, s.scan_integer(base: 16)
1004+
assert_equal 7, s.pos
1005+
assert_predicate s, :matched?
1006+
1007+
s = create_string_scanner('+123')
1008+
assert_equal 0x123, s.scan_integer(base: 16)
1009+
assert_equal 4, s.pos
1010+
assert_predicate s, :matched?
1011+
1012+
s = create_string_scanner('-abc')
1013+
assert_equal -0xabc, s.scan_integer(base: 16)
1014+
assert_equal 4, s.pos
1015+
assert_predicate s, :matched?
1016+
1017+
huge_integer = 'F' * 2_000
1018+
s = create_string_scanner(huge_integer)
1019+
assert_equal huge_integer.to_i(16), s.scan_integer(base: 16)
1020+
assert_equal 2_000, s.pos
1021+
assert_predicate s, :matched?
1022+
end
9481023
end
9491024

9501025
class TestStringScanner < Test::Unit::TestCase

0 commit comments

Comments
 (0)