From a68bbf2b8f5b8bc880bb7ed12518de03baef6819 Mon Sep 17 00:00:00 2001 From: nganhkhoa Date: Wed, 10 Jan 2024 14:19:59 +0700 Subject: [PATCH] erase objc method names --- .../internal/wrapper/action/save_imports.go | 12 ++- macho-go/internal/wrapper/cli.go | 15 +++ macho-go/pkg/ios/macho/dyld_info.go | 6 +- macho-go/pkg/ios/macho/edit.go | 30 +++++- macho-go/pkg/ios/macho/objc.go | 95 +++++++++++++++++++ macho-go/proto/macho_info.proto | 8 +- research/custom_loader/a.mm | 12 ++- research/custom_loader/b.cc | 43 +++++++++ research/custom_loader/build.sh | 4 +- 9 files changed, 212 insertions(+), 13 deletions(-) create mode 100644 macho-go/pkg/ios/macho/objc.go diff --git a/macho-go/internal/wrapper/action/save_imports.go b/macho-go/internal/wrapper/action/save_imports.go index 245906d..efa7b58 100644 --- a/macho-go/internal/wrapper/action/save_imports.go +++ b/macho-go/internal/wrapper/action/save_imports.go @@ -68,7 +68,7 @@ func (action *saveImports) saveToInfo(mf *MachoFile) error { // now we expect everything is sorted and easier to build strings tables // this is not fully optimized, there can be repeated symbol name in different libraries for _, symbol := range symbols_raw { - if symbol.Type() != "lazy" { + if !symbol.SafeForRemoval() { continue } @@ -138,6 +138,16 @@ func (action *saveImports) saveToInfo(mf *MachoFile) error { } mf.Info().Main = mc.Main() + + selectors_list := []*protomodel.MachoInfo_Selector{} + for _, sel := range mc.CollectSpecialSelectors() { + selectors_list = append(selectors_list, &protomodel.MachoInfo_Selector{ + Idx: uint32(sel.Idx()), + Name: sel.Name(), + }) + } + mf.Info().SpecialSelectors = selectors_list + return nil } diff --git a/macho-go/internal/wrapper/cli.go b/macho-go/internal/wrapper/cli.go index ca198ae..905c1c2 100644 --- a/macho-go/internal/wrapper/cli.go +++ b/macho-go/internal/wrapper/cli.go @@ -299,6 +299,21 @@ func bcell2header(bfile string, header string) { } fmt.Fprintf(w, "};\n") fmt.Fprintf(w, "uint32_t n_instructions = %d;\n", n_instructions) + + fmt.Fprintf(w, "__attribute__((section(\"__DATA,bshield\")))\n") + fmt.Fprintf(w, "uint32_t special_selectors_idx[] = {\n") + for _, selector := range info.GetSpecialSelectors() { + fmt.Fprintf(w, "%x,\n", selector.Idx) + } + fmt.Fprintf(w, "};\n") + + fmt.Fprintf(w, "__attribute__((section(\"__DATA,bshield\")))\n") + fmt.Fprintf(w, "char* special_selectors_name[] = {\n") + for _, selector := range info.GetSpecialSelectors() { + fmt.Fprintf(w, "\"%s\",\n", selector.Name) + } + fmt.Fprintf(w, "};\n") + fmt.Fprintf(w, "uint32_t n_selectors = %d;\n", len(info.GetSpecialSelectors())) } fmt.Fprintf(w, "}// namespace bshield_data\n") w.Flush() diff --git a/macho-go/pkg/ios/macho/dyld_info.go b/macho-go/pkg/ios/macho/dyld_info.go index 423afa5..21dd92c 100644 --- a/macho-go/pkg/ios/macho/dyld_info.go +++ b/macho-go/pkg/ios/macho/dyld_info.go @@ -42,6 +42,10 @@ func (sym *ImportSymbol) Type() string { return sym.typ } +func (sym *ImportSymbol) SafeForRemoval() bool { + return sym.typ == "lazy" || sym.typ == "fixups" +} + func (sym *ImportSymbol) Dylib() string { return sym.dylib } @@ -150,7 +154,7 @@ func (mc *MachoContext) CollectBindSymbolsModern() []*ImportSymbol { sym.address = uint64(address) sym.name = name sym.dylib = dylib - sym.typ = "lazy" + sym.typ = "fixups" sym.lib_ordinal = uint32(s.lib_ordinal) sym.segment = uint32(mc.findSegmentIndexAt(uint64(address))) diff --git a/macho-go/pkg/ios/macho/edit.go b/macho-go/pkg/ios/macho/edit.go index eac5dca..cc727bb 100644 --- a/macho-go/pkg/ios/macho/edit.go +++ b/macho-go/pkg/ios/macho/edit.go @@ -280,7 +280,10 @@ func (mc *MachoContext) RemoveBindSymbols() { rand.Seed(time.Now().UnixNano()) - if mc.dyldinfo == nil { + isModernSymbol := mc.dyldinfo == nil + isLegacySymbol := !isModernSymbol + + if isModernSymbol { mc.removeBindSymbolsModern() } else { mc.removeBindSymbolsLegacy() @@ -290,14 +293,12 @@ func (mc *MachoContext) RemoveBindSymbols() { mc.ReworkForObjc() } - // due to some limitations when design this tool - // we write the c code to stdout lol for _, symbol := range mc.CollectBindSymbols() { - if symbol.Type() != "lazy" { + if !symbol.SafeForRemoval() { continue } - if mc.dyldinfo != nil { + if isLegacySymbol { // for legacy resolve the opcodes can be rewritten as 0x00 mc.file.WriteAt(make([]byte, 8), int64(symbol.file_address)) } else { @@ -394,6 +395,16 @@ func (mc *MachoContext) ReworkForObjc() { // edit flags to not S_MOD_INIT_FUNC mc.file.WriteAt([]byte{0, 0, 0, 0}, section_ptr+0x40) } + + // erases all objc method names + // this should still works because the cache inserts the pointer value not string + // but some symbols relies on pre-defined implementations, such as **load** method + // load method is the same across all classes and so objc define an implementation + // selector should points to this load selector to make objc thinks that it's "load" + if bytes.Compare(bytes.Trim(section.SectName(), "\x00"), []byte("__objc_methname")) == 0 { + // mc.file.WriteAt([]byte("__objc_methbruh"), section_ptr) + mc.file.WriteAt(make([]byte, section.Size()), int64(section.Offset())) + } section_ptr += 16*2 + 8*2 + 4*8 } } @@ -420,6 +431,15 @@ func (mc *MachoContext) ReworkForObjc() { sections := segment.Sections() last := sections[len(sections)-1] data_end = int(last.Addr() - segment.Vmaddr() + segment.Fileoff() + last.Size()) + + // do not register selector and see what happens + section_ptr := ptr + 0x40 + 8 + for _, section := range segment.Sections() { + if bytes.Compare(bytes.Trim(section.SectName(), "\x00"), []byte("__objc_selrefs")) == 0 { + // mc.file.WriteAt([]byte("__objc_selbruh"), section_ptr) + } + section_ptr += 16*2 + 8*2 + 4*8 + } } ptr += int64(cmd.Cmdsize()) } diff --git a/macho-go/pkg/ios/macho/objc.go b/macho-go/pkg/ios/macho/objc.go new file mode 100644 index 0000000..9e2683e --- /dev/null +++ b/macho-go/pkg/ios/macho/objc.go @@ -0,0 +1,95 @@ +package macho + +import ( + "bytes" + "encoding/binary" + "strings" + + . "ios-wrapper/pkg/ios" +) + + +type SpecialSelector struct { + idx uint + name string +} + +func (sel *SpecialSelector) Idx() uint { + return sel.idx +} + +func (sel *SpecialSelector) Name() string { + return sel.name +} + +// collect the index and the name in selector list of special method names +// these names are resolved by the dyld objc cache +// through __dyld_get_objc_selector +// +// we currently have the following symbols guaranteed to be in this list: +// - load +// - retain +func (mc *MachoContext) CollectSpecialSelectors() []*SpecialSelector { + + var special_selectors []*SpecialSelector + var methods []byte + var methname_offset uint32 + + for _, cmd := range mc.commands { + if cmd.Cmd() == LC_MAIN { + continue + } + if cmd.Cmd() != LC_SEGMENT_64 { + continue + } + var segment = cmd.(*Segment64) + + if bytes.Compare(bytes.Trim(segment.SegName(), "\x00"), []byte("__TEXT")) == 0 { + for _, section := range segment.Sections() { + if bytes.Compare(bytes.Trim(section.SectName(), "\x00"), []byte("__objc_methname")) == 0 { + methname_offset = section.Offset() + methods = make([]byte, section.Size()) + mc.file.ReadAt(methods, int64(section.Offset())) + } + } + } + if bytes.Compare(bytes.Trim(segment.SegName(), "\x00"), []byte("__DATA")) == 0 { + for _, section := range segment.Sections() { + if bytes.Compare(bytes.Trim(section.SectName(), "\x00"), []byte("__objc_selrefs")) == 0 { + selectors_buffer := make([]byte, section.Size()) + mc.file.ReadAt(selectors_buffer, int64(section.Offset())) + + buffer := bytes.NewReader(selectors_buffer) + + for i := uint(0); i < uint(section.Size()) / 8; i++ { + // this field is actually a Rebase + // we assume that no rebase is needed + // so everything sticks to its file offset + var offset uint32 + binary.Read(buffer, mc.byteorder, &offset) // first 4 bytes is offset + + var name_builder strings.Builder + for j := uint32(0); ; j++ { + c := methods[offset - methname_offset + j] + if c == 0 { + break + } + name_builder.WriteByte(c) + } + name := name_builder.String() + if name == "load" { + special_selectors = append(special_selectors, &SpecialSelector{ + idx: i, + name: name, + }) + } + + binary.Read(buffer, mc.byteorder, &offset) // ignore rebase arguments + } + } + } + } + } + return special_selectors +} + diff --git a/macho-go/proto/macho_info.proto b/macho-go/proto/macho_info.proto index a2aecdb..dac9955 100644 --- a/macho-go/proto/macho_info.proto +++ b/macho-go/proto/macho_info.proto @@ -44,11 +44,15 @@ message MachoInfo { repeated LibraryImportedSymbols tables = 3; } + message Selector { + uint32 idx = 1; + string name = 2; + } + PointerSize pointer_size = 1; uint64 image_base = 2; uint64 main = 3; repeated InitPointer init_pointers = 4; - // repeated BindSymbol symbols = 5; AllImportedSymbols symbols = 5; - + repeated Selector special_selectors = 6; } diff --git a/research/custom_loader/a.mm b/research/custom_loader/a.mm index a594fd5..dfdc845 100644 --- a/research/custom_loader/a.mm +++ b/research/custom_loader/a.mm @@ -1,4 +1,5 @@ #import +#include #include @interface Foo : NSObject @@ -6,7 +7,7 @@ @implementation Foo - (void)bar { - NSLog(@"%@", self); + NSLog(@"[Foo bar]: %@", self); } @end @@ -44,9 +45,16 @@ hmmge(int argc, char** argv) { int main(int argc, const char * argv[]) { @autoreleasepool { - NSLog(@"Hello, World!"); + NSLog(@"main()"); + NSLog(@"selector for \"bar:\" %p", @selector(bar:)); + Foo *foo = [[Foo alloc] init]; [foo bar]; + + NSLog(@"directly call \"bar\" %p through objc_msgSend %p with object foo %p\n", @selector(bar), objc_msgSend, foo); + typedef void (*barfunc)(id, SEL); + barfunc bar_ = &objc_msgSend; + bar_(foo, @selector(bar)); } printf("argc=%d\n", argc); diff --git a/research/custom_loader/b.cc b/research/custom_loader/b.cc index 42cf463..e2e8013 100644 --- a/research/custom_loader/b.cc +++ b/research/custom_loader/b.cc @@ -1279,6 +1279,7 @@ void fix_objc(struct libcache_item *libfixing, struct libcache &cache) { // "mov rcx, 123;" // "call r12;"); + printf("fixing objective-c\n"); void *header = libfixing->header; const uint32_t magic = *(uint32_t *)header; char *ptr = (char *)header; @@ -1305,6 +1306,48 @@ void fix_objc(struct libcache_item *libfixing, struct libcache &cache) { printf("segment %s\n", name); if (custom_strcmp(name, "__TEXT") == 0) { slide = (uint64_t)header - vmaddr; + + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + printf("section %s\n", secname); + if (custom_strncmp(secname, "__objc_methname", 16) == 0) { + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + uint64_t *data_ptr = (uint64_t *)(addr + slide); + // printf("methname addr %p : %s\n", data_ptr, (char*)data_ptr); + break; + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } else if (custom_strcmp(name, "__DATA") == 0) { + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + printf("section %s\n", secname); + if (custom_strncmp(secname, "__objc_selrefs", 16) == 0) { + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + uint64_t *data_ptr = (uint64_t *)(addr + slide); + + uint32_t trie_size; + void* libdyld = cache.libdyld; + void *libdyld_export_trie = get_export_trie(libdyld, trie_size); + typedef void *(*dyld_get_objc_selector_t)(const char*); + dyld_get_objc_selector_t dyld_get_objc_selector_func = (dyld_get_objc_selector_t)find_in_export_trie( + libdyld, libdyld_export_trie, "__dyld_get_objc_selector"); + + // resolve method names that cached in the dyld + for (int i = 0; i < bshield_data::n_selectors; i++) { + uint32_t idx = bshield_data::special_selectors_idx[i]; + char* name = bshield_data::special_selectors_name[i]; + data_ptr[idx] = (uint64_t)dyld_get_objc_selector_func(name); + } + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } } else if (custom_strcmp(name, "__DATA_CONST") == 0) { uint64_t nsect = *((uint32_t *)ptr + 8 * 2); char *sections_ptr = (char *)((uint32_t *)ptr + 18); diff --git a/research/custom_loader/build.sh b/research/custom_loader/build.sh index 0c81321..f82064b 100755 --- a/research/custom_loader/build.sh +++ b/research/custom_loader/build.sh @@ -64,11 +64,11 @@ clang -fobjc-arc -ObjC -mmacosx-version-min=$VERSION -o $OUT/a -L"./out" -lb a.m # extract symbols from a # ../../macho-go/bin/ios-wrapper pepe -o $OUT/a-fixed -b $OUT/b.bcell --remove-imports --remove-exports --remove-symbol-table --keep-imports _printf $OUT/a -../../macho-go/bin/ios-wrapper pepe -o $OUT/a-fixed -b $OUT/b.bcell --remove-imports --remove-exports $OUT/a +../../macho-go/bin/ios-wrapper pepe -o $OUT/a-fixed -b $OUT/b.bcell --remove-imports --remove-exports --remove-symbol-table --remove-others $OUT/a ../../macho-go/bin/ios-wrapper bcell2header -b $OUT/b.bcell -o $OUT/b.h # build libb with symbols extracted from a clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared -Wl,-reexport_library out/libc.dylib b.cc -# ../../macho-go/bin/ios-wrapper pepe -o $OUT/libb.dylib -b $OUT/libb.bcell --remove-imports --remove-exports --keep-imports _dyld_get_sdk_version --keep-imports _malloc --keep-imports _printf --keep-imports ___stack_chk_guard $OUT/libb.dylib +../../macho-go/bin/ios-wrapper pepe -o $OUT/libb.dylib -b $OUT/libb.bcell --remove-imports --remove-exports --remove-symbol-table --remove-others --keep-imports _dyld_get_sdk_version --keep-imports _malloc --keep-imports ___stack_chk_guard --keep-imports _printf $OUT/libb.dylib # resign codesign --force --deep -s - $OUT/a-fixed