Skip to content

Commit 6306a0c

Browse files
committed
fix(objc): add support for reading UTF-16 encoded CFStrings #78
1 parent 54c7a1d commit 6306a0c

File tree

3 files changed

+42
-2
lines changed

3 files changed

+42
-2
lines changed

file.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"strings"
1717
"sync"
1818
"unicode"
19+
"unicode/utf16"
1920

2021
"github.com/blacktop/go-dwarf"
2122

@@ -1805,6 +1806,23 @@ func (f *File) GetCString(addr uint64) (string, error) {
18051806
return "", fmt.Errorf("%w at address %#x", ErrCStringNoTerminator, addr)
18061807
}
18071808

1809+
// getUTF16String reads a UTF-16LE encoded string at a given virtual address.
1810+
// charCount is the number of UTF-16 code units (not bytes).
1811+
func (f *File) getUTF16String(addr, charCount uint64) (string, error) {
1812+
if charCount == 0 {
1813+
return "", nil
1814+
}
1815+
buf := make([]byte, charCount*2)
1816+
if _, err := f.cr.ReadAtAddr(buf, addr); err != nil {
1817+
return "", fmt.Errorf("failed to read UTF-16 string at address %#x: %w", addr, err)
1818+
}
1819+
codes := make([]uint16, charCount)
1820+
for i := range codes {
1821+
codes[i] = f.ByteOrder.Uint16(buf[i*2:])
1822+
}
1823+
return string(utf16.Decode(codes)), nil
1824+
}
1825+
18081826
func (f *File) GetCStrings() (map[string]map[string]uint64, error) {
18091827
strs := make(map[string]map[string]uint64)
18101828

objc.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1645,9 +1645,14 @@ func (f *File) GetCFStrings() ([]objc.CFString, error) {
16451645
// return nullptr;
16461646
// cfs_characters = n_value;
16471647
}
1648-
cfstrings[idx].Name, err = f.GetCString(cfstrings[idx].Data)
1648+
// Check encoding from Info field and use appropriate string reader
1649+
if cfstrings[idx].CFString64Type.IsUTF16() {
1650+
cfstrings[idx].Name, err = f.getUTF16String(cfstrings[idx].Data, cfstrings[idx].Length)
1651+
} else {
1652+
cfstrings[idx].Name, err = f.GetCString(cfstrings[idx].Data)
1653+
}
16491654
if err != nil {
1650-
return nil, fmt.Errorf("failed to read cstring: %v", err)
1655+
return nil, fmt.Errorf("failed to read cfstring: %v", err)
16511656
}
16521657
if c, ok := f.objc[cfstrings[idx].IsaVMAddr]; ok {
16531658
cfstrings[idx].Class = c.(*objc.Class)

types/objc/objc.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,23 @@ type CFString64Type struct {
404404
Length uint64 // number of non-NULL characters in above
405405
}
406406

407+
// CFString encoding constants (derived from Info field)
408+
const (
409+
CFStringEncodingMask = 0x0FF0 // mask to extract encoding from Info field
410+
CFStringEncodingASCII = 0x07C8 // UTF-8/ASCII encoding
411+
CFStringEncodingUnicode = 0x07D0 // UTF-16LE encoding
412+
)
413+
414+
// Encoding returns the string encoding type from the CFString Info field
415+
func (c CFString64Type) Encoding() uint64 {
416+
return c.Info & CFStringEncodingMask
417+
}
418+
419+
// IsUTF16 returns true if the CFString uses UTF-16 encoding
420+
func (c CFString64Type) IsUTF16() bool {
421+
return c.Encoding() == CFStringEncodingUnicode
422+
}
423+
407424
const (
408425
FAST_IS_SWIFT_LEGACY = 1 << 0 // < 5
409426
FAST_IS_SWIFT_STABLE = 1 << 1 // 5.X

0 commit comments

Comments
 (0)