diff --git a/research/custom_loader/a.mm b/research/custom_loader/a.mm index 029b87e..914528c 100644 --- a/research/custom_loader/a.mm +++ b/research/custom_loader/a.mm @@ -1,8 +1,31 @@ #import -int main(int argc, const char * argv[]) { - @autoreleasepool { - NSLog(@"Hello, World!"); - } - return 0; +@interface Foo : NSObject +@end + +@implementation Foo +- (void)bar { + NSLog(@"%@", self); +} +@end + +@interface Bar : NSObject +@end + +@implementation Bar ++ (void)load { + NSLog(@"%@", self); +} +@end + +@implementation Baz : Bar +@end + +int main(int argc, const char * argv[]) { + @autoreleasepool { + NSLog(@"Hello, World!"); + Foo *foo = [[Foo alloc] init]; + [foo bar]; + } + return 0; } diff --git a/research/custom_loader/b.cc b/research/custom_loader/b.cc index 6f47ffb..515e469 100644 --- a/research/custom_loader/b.cc +++ b/research/custom_loader/b.cc @@ -779,6 +779,7 @@ void build_cache(struct libcache& cache, void* main) { } } +void fix_objc(struct libcache_item* libfixing, struct libcache& cache); void fix(struct libcache& cache) { // now we have function to find exported symbols // it supports full name search or hash search @@ -812,6 +813,58 @@ void fix(struct libcache& cache) { // // just an idea, if we can somehow reduce the datasize then it would be better + // OBJC: + // In Objective-C, the binary is loaded with the Objective-C runtime + // This runtime (a library) install a hook on dyld for all images + // And because this runtime is a system runtime, the bootstrap step is already prepared + // The details on this runtime will be in a seperated document, below are some basics + // + // The compiler for Objective-C emits a bunch of details for the runtime in the binary itself + // These information are stored in sections with prefix name __objc, namely + // - __objc_classlist + // - __objc_clssrefs + // - __objc_selref + // - __objc_const + // - __objc_data + // + // Objective-C stores the class interface in the binary particulary in __objc_data + // This interface contains the superclass, metaclass, and a cache to methods pointers + // These information are either bound (by dyld) or built (by Objective-C runtime) + // + // One of the important routine in the Objective-C runtime is readClass. + // https://github.com/apple-oss-distributions/objc4/blob/689525d556eb3dee1ffb700423bccf5ecc501dbf/runtime/objc-runtime-new.mm#L3385 + // + // This function is not exported, however there is an entry in the symtab. + // By using this, we can find the its address + // + // Because __objc_data contains to-be-bound values, + // which will be resolved by dyld and referenced by Objective-C runtime later + // if we simply erase this value, reference(s) read by Objective-C runtime ensues a crash + // (through debugging, we know that the crash happens in readClass, realizeClassWithoutSwift) + // + // However, we can evade this by making the runtime thinks there is no class needs setup + // This can be done by changing the __objc_classlist to some other name or remove this section + // Because the runtime find the __objc_classlist section by name, and the size of the section + // is used to iterate through pointers. + // So if we change the name, the runtime will have no class to run setup. + // Or complete removal and call the setup by ourselves, because we know where the data is + // + // The setup is done through readClass function, as said above, its address can be found + // This function is pure C function so call into this function is easy + // + // Important function with their names: + // _readClass(objc_class*, bool, bool) + // mangled: __ZL9readClassP10objc_classbb + // + // _realizeClassWithoutSwift(objc_class*, objc_class*) + // mangled: __ZL24realizeClassWithoutSwiftP10objc_classS0_ + // + // _remapClass(objc_class*) + // mangled: __ZL10remapClassP10objc_class + // + // _addClassTableEntry(objc_class*, bool) + // magled: __ZL18addClassTableEntryP10objc_classb + // NOTES: // mach_task_self() has a conflicting symbol or something, // in symbol table it's: _mach_task_self_ @@ -826,8 +879,6 @@ void fix(struct libcache& cache) { uint32_t libsystem_hash = calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); - // this is just for now, - // in the future we will self fix our lib so our lib also has no import typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int); typedef void *(*mach_task_self_t)(); mach_task_self_t mach_task_self_func = @@ -845,7 +896,6 @@ void fix(struct libcache& cache) { printf("there is no imports to fix\n"); } struct libcache_item* libfixing = get_libcache_with_name(&cache, lib_to_resolve); - // print_macho_summary(libfixing->header); for (int i = 0; i < nimports; i++) { struct imported_symbol symbol = imported_table[i]; uint64_t fix_at = symbol.offset + libfixing->segment[symbol.segment_i]; @@ -886,6 +936,163 @@ void fix(struct libcache& cache) { // uint64_t start_page = page_rw_fixed[j]; // vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0, VM_PROT_READ); // } + + fix_objc(libfixing, cache); +} + +void fix_objc(struct libcache_item* libfixing, struct libcache& cache) { + // Manually run the Objective-C runtime for each class + // + + // use the snippet bellow to call class method + // because often the this pointer is stored in a different register + // so need to load that register in before calling the function + // + // void* foo = (void*)function_to_call; + // asm("movq %0, %%r12"::"r"(foo)); + // __asm__(".intel_syntax noprefix;" + // "mov rcx, 123;" + // "call r12;"); + + typedef void *(*readClass_t)(void *, bool, bool); + typedef void *(*realizeClassWithoutSwift_t)(void *, void*); + typedef void *(*remapClass_t)(void *); + typedef void *(*load_method_t)(void*, void*); + typedef void *(*sel_lookUpByName_t)(const char*); + typedef void (*addClassTableEntry_t)(void *); + typedef void (*schedule_class_load_t)(void *); + + typedef void *(*objc_autoreleasePoolPush_t)(); + typedef void (*objc_autoreleasePoolPop_t)(void *); + + void* header = libfixing->header; + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char* command_ptr = ptr; + + uint64_t linkedit_vmaddr; + uint64_t linkedit_fileoffset; + uint64_t slide; + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + uint64_t vmaddr = *((uint64_t *)ptr + 3); + uint64_t fileoffset = *((uint64_t *)ptr + 5); + // this assumes that __TEXT comes before __DATA_CONST + printf("segment %s\n", name); + if (custom_strcmp(name, "__TEXT") == 0) { + slide = (uint64_t)header - vmaddr; + } else if (custom_strcmp(name, "__DATA_CONST") == 0) { + uint64_t nsect = *((uint32_t *)ptr + 8*2); + char* sections_ptr = (char*)((uint32_t*)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char* secname = sections_ptr; + printf("section %s\n", secname); + if (custom_strncmp(secname, "__objc_classbruh", 16) == 0) { + uint64_t addr = *((uint64_t*)sections_ptr + 4); + uint64_t size = *((uint64_t*)sections_ptr + 5); + uint64_t *data_ptr = (uint64_t*)(addr + slide); + + readClass_t readClass = (readClass_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL9readClassP10objc_classbb"); + realizeClassWithoutSwift_t realizeClassWithoutSwift = (realizeClassWithoutSwift_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL24realizeClassWithoutSwiftP10objc_classS0_"); + + for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) { + // this pointer is rebased by dyld and points to the correct class interface + // for some reason, we can skip this and it should still work + readClass((void*)data_ptr[ptr_i], false, false); + realizeClassWithoutSwift((void*)data_ptr[ptr_i], 0); + } + } + else if (custom_strncmp(secname, "__objc_nlclsbruh", 16) == 0) { + uint64_t addr = *((uint64_t*)sections_ptr + 4); + uint64_t size = *((uint64_t*)sections_ptr + 5); + uint64_t *data_ptr = (uint64_t*)(addr + slide); + + uint64_t* loadable_classes = (uint64_t*)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL16loadable_classes"); + uint32_t* loadable_classes_allocated = (uint32_t*)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL26loadable_classes_allocated"); + uint32_t* loadable_classes_used = (uint32_t*)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL21loadable_classes_used"); + + remapClass_t remapClass = (remapClass_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL10remapClassP10objc_class"); + schedule_class_load_t schedule_class_load = (schedule_class_load_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL19schedule_class_loadP10objc_class"); + realizeClassWithoutSwift_t realizeClassWithoutSwift = (realizeClassWithoutSwift_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL24realizeClassWithoutSwiftP10objc_classS0_"); + addClassTableEntry_t addClassTableEntry = (addClassTableEntry_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL18addClassTableEntryP10objc_classb"); + sel_lookUpByName_t sel_lookUpByName = (sel_lookUpByName_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "_sel_lookUpByName"); + objc_autoreleasePoolPush_t objc_autoreleasePoolPush = (objc_autoreleasePoolPush_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__objc_autoreleasePoolPush"); + objc_autoreleasePoolPop_t objc_autoreleasePoolPop = (objc_autoreleasePoolPop_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__objc_autoreleasePoolPop"); + + // https://github.com/apple-oss-distributions/objc4/blob/689525d556eb3dee1ffb700423bccf5ecc501dbf/runtime/objc-runtime-new.mm#L3822 + for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) { + void* cls = remapClass((void*)data_ptr[ptr_i]); + if (!cls) continue; + addClassTableEntry(cls); + realizeClassWithoutSwift(cls, 0); + printf("build nonlazy class at (%llx)%p\n", data_ptr[ptr_i], cls); + } + + printf("loadable_classes %llx %llx %llx\n", *loadable_classes, *loadable_classes_used, *loadable_classes_allocated); + for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) { + void* cls = remapClass((void*)data_ptr[ptr_i]); + schedule_class_load(cls); + printf("add class load (%llx)%p\n", data_ptr[ptr_i], cls); + } + printf("loadable_classes %llx\n", *loadable_classes); + + void* pool = objc_autoreleasePoolPush(); + { + struct loadable_class_t { + void* cls; + void* method; + }; + struct loadable_class_t *classes = (struct loadable_class_t*)*loadable_classes; + int used = *loadable_classes_used; + *loadable_classes = 0; + *loadable_classes_allocated = 0; + *loadable_classes_used = 0; + void* sel = sel_lookUpByName("load"); + printf("selector %p\n", sel); + // Call all +loads for the detached list. + for (i = 0; i < used; i++) { + void* cls = classes[i].cls; + load_method_t load_method = (load_method_t)classes[i].method; + printf("call load of class %p %p\n", cls, load_method); + if (!cls) continue; + (load_method)(cls, sel); + } + // Destroy the detached list. + if (classes) free(classes); + } + objc_autoreleasePoolPop(pool); + printf("loadable_classes %llx\n", *loadable_classes); + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } else if (custom_strcmp(name, "__LINKEDIT") == 0) { + linkedit_vmaddr = vmaddr; + linkedit_fileoffset = fileoffset; + } + } + ptr += cmdsize; } }