Skip to content

Commit 229eaaf

Browse files
committed
Merge pull request #21 from bearmini/master
added mutf-8 (modified utf-8) support
2 parents 1160cbf + caf92c3 commit 229eaaf

File tree

2 files changed

+54
-1
lines changed

2 files changed

+54
-1
lines changed

lib/android/dex/dex_object.rb

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,49 @@ def to_s
179179
@params[:data]
180180
end
181181
private
182+
def mutf8_to_utf8(data, off, ulen)
183+
mi = 0 # index of mutf8 data
184+
codepoints = []
185+
while ulen > 0 do
186+
b0 = data[off + mi].ord
187+
bu = (b0 & 0xf0) # b0's upper nibble
188+
if (b0 & 0x80) == 0 # single byte encoding (0b0xxx_xxxx)
189+
c = b0
190+
mi += 1
191+
ulen -= 1
192+
elsif bu == 0xc0 || bu == 0xd0 # two-byte encoding (0b110x_xxxx)
193+
b1 = data[off + mi + 1].ord
194+
c = (b0 & 0x1f) << 6 | (b1 & 0x3f)
195+
mi += 2
196+
ulen -= 1
197+
elsif bu == 0xe0 # three-byte encoding (0b1110_xxxx)
198+
b1 = data[off + mi + 1].ord
199+
b2 = data[off + mi + 2].ord
200+
c = (b0 & 0x0f) << 12 | (b1 & 0x3f) << 6 | (b2 & 0x3f)
201+
mi += 3
202+
ulen -= 1
203+
if 0xD800 <= c && c <= 0xDBFF # this must be a surrogate pair
204+
b4 = data[off + mi + 1].ord
205+
b5 = data[off + mi + 2].ord
206+
c = ((b1 & 0x0f) + 1) << 16 | (b2 & 0x3f) << 10 | (b4 & 0x0f) << 6 | (b5 & 0x3f)
207+
mi += 3
208+
ulen -= 1
209+
end
210+
else
211+
STDERR.puts "unsupported byte: 0x#{'%02X' % b0} @#{mi}"
212+
c = 0
213+
mi += 1
214+
next
215+
end
216+
if c != 0
217+
codepoints << c
218+
end
219+
end
220+
codepoints.pack("U*")
221+
end
182222
def parse
183223
@params[:utf16_size] = read_uleb
184-
@params[:data] = @data[@offset + @parsing_off, @params[:utf16_size]]
224+
@params[:data] = mutf8_to_utf8(@data, @offset + @parsing_off, @params[:utf16_size])
185225
end
186226
end
187227

spec/dex/dex_object_spec.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,17 @@
102102
it { should match(/\A<Android::Dex::DexObject::Header.*>\Z/m) }
103103
end
104104
end
105+
106+
describe Android::Dex::DexObject::StringDataItem do
107+
let(:string_data_item_sample) {
108+
sample = "\x0b\x61\x62\x63\xc0\x80\xc8\x85\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xed\xa0\x81\xed\xb0\x80\xc0\x80"
109+
sample.force_encoding(Encoding::ASCII_8BIT)
110+
}
111+
let(:string_data_item) { Android::Dex::DexObject::StringDataItem.new(string_data_item_sample, 0) }
112+
describe "#to_s" do
113+
subject { string_data_item.to_s }
114+
it { should == "abc\u{205}\u{3042}\u{3044}\u{3046}\u{10400}" }
115+
end
116+
end
117+
105118
end

0 commit comments

Comments
 (0)