add fix for objc binaries

TODO: Fix call to +load() for non-lazy class
This commit is contained in:
nganhkhoa 2023-06-15 10:46:10 +07:00
parent ed793b1df6
commit 3aaa85520e
2 changed files with 238 additions and 8 deletions

View File

@ -1,8 +1,31 @@
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
int main(int argc, const char * argv[]) { @interface Foo : NSObject
@autoreleasepool { @end
NSLog(@"Hello, World!");
} @implementation Foo
return 0; - (void)bar {
NSLog(@"%@", self);
}
@end
@interface Bar : NSObject
@end
@implementation Bar
+ (void)load {
NSLog(@"%@", self);
}
@end
@implementation Baz : Bar
@end
int main(int argc, const char * argv[]) {
@autoreleasepool {
NSLog(@"Hello, World!");
Foo *foo = [[Foo alloc] init];
[foo bar];
}
return 0;
} }

View File

@ -779,6 +779,7 @@ void build_cache(struct libcache& cache, void* main) {
} }
} }
void fix_objc(struct libcache_item* libfixing, struct libcache& cache);
void fix(struct libcache& cache) { void fix(struct libcache& cache) {
// now we have function to find exported symbols // now we have function to find exported symbols
// it supports full name search or hash search // it supports full name search or hash search
@ -812,6 +813,58 @@ void fix(struct libcache& cache) {
// //
// just an idea, if we can somehow reduce the datasize then it would be better // just an idea, if we can somehow reduce the datasize then it would be better
// OBJC:
// In Objective-C, the binary is loaded with the Objective-C runtime
// This runtime (a library) install a hook on dyld for all images
// And because this runtime is a system runtime, the bootstrap step is already prepared
// The details on this runtime will be in a seperated document, below are some basics
//
// The compiler for Objective-C emits a bunch of details for the runtime in the binary itself
// These information are stored in sections with prefix name __objc, namely
// - __objc_classlist
// - __objc_clssrefs
// - __objc_selref
// - __objc_const
// - __objc_data
//
// Objective-C stores the class interface in the binary particulary in __objc_data
// This interface contains the superclass, metaclass, and a cache to methods pointers
// These information are either bound (by dyld) or built (by Objective-C runtime)
//
// One of the important routine in the Objective-C runtime is readClass.
// https://github.com/apple-oss-distributions/objc4/blob/689525d556eb3dee1ffb700423bccf5ecc501dbf/runtime/objc-runtime-new.mm#L3385
//
// This function is not exported, however there is an entry in the symtab.
// By using this, we can find the its address
//
// Because __objc_data contains to-be-bound values,
// which will be resolved by dyld and referenced by Objective-C runtime later
// if we simply erase this value, reference(s) read by Objective-C runtime ensues a crash
// (through debugging, we know that the crash happens in readClass, realizeClassWithoutSwift)
//
// However, we can evade this by making the runtime thinks there is no class needs setup
// This can be done by changing the __objc_classlist to some other name or remove this section
// Because the runtime find the __objc_classlist section by name, and the size of the section
// is used to iterate through pointers.
// So if we change the name, the runtime will have no class to run setup.
// Or complete removal and call the setup by ourselves, because we know where the data is
//
// The setup is done through readClass function, as said above, its address can be found
// This function is pure C function so call into this function is easy
//
// Important function with their names:
// _readClass(objc_class*, bool, bool)
// mangled: __ZL9readClassP10objc_classbb
//
// _realizeClassWithoutSwift(objc_class*, objc_class*)
// mangled: __ZL24realizeClassWithoutSwiftP10objc_classS0_
//
// _remapClass(objc_class*)
// mangled: __ZL10remapClassP10objc_class
//
// _addClassTableEntry(objc_class*, bool)
// magled: __ZL18addClassTableEntryP10objc_classb
// NOTES: // NOTES:
// mach_task_self() has a conflicting symbol or something, // mach_task_self() has a conflicting symbol or something,
// in symbol table it's: _mach_task_self_ // in symbol table it's: _mach_task_self_
@ -826,8 +879,6 @@ void fix(struct libcache& cache) {
uint32_t libsystem_hash = uint32_t libsystem_hash =
calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib");
// this is just for now,
// in the future we will self fix our lib so our lib also has no import
typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int); typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int);
typedef void *(*mach_task_self_t)(); typedef void *(*mach_task_self_t)();
mach_task_self_t mach_task_self_func = mach_task_self_t mach_task_self_func =
@ -845,7 +896,6 @@ void fix(struct libcache& cache) {
printf("there is no imports to fix\n"); printf("there is no imports to fix\n");
} }
struct libcache_item* libfixing = get_libcache_with_name(&cache, lib_to_resolve); struct libcache_item* libfixing = get_libcache_with_name(&cache, lib_to_resolve);
// print_macho_summary(libfixing->header);
for (int i = 0; i < nimports; i++) { for (int i = 0; i < nimports; i++) {
struct imported_symbol symbol = imported_table[i]; struct imported_symbol symbol = imported_table[i];
uint64_t fix_at = symbol.offset + libfixing->segment[symbol.segment_i]; uint64_t fix_at = symbol.offset + libfixing->segment[symbol.segment_i];
@ -886,6 +936,163 @@ void fix(struct libcache& cache) {
// uint64_t start_page = page_rw_fixed[j]; // uint64_t start_page = page_rw_fixed[j];
// vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0, VM_PROT_READ); // vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0, VM_PROT_READ);
// } // }
fix_objc(libfixing, cache);
}
void fix_objc(struct libcache_item* libfixing, struct libcache& cache) {
// Manually run the Objective-C runtime for each class
//
// use the snippet bellow to call class method
// because often the this pointer is stored in a different register
// so need to load that register in before calling the function
//
// void* foo = (void*)function_to_call;
// asm("movq %0, %%r12"::"r"(foo));
// __asm__(".intel_syntax noprefix;"
// "mov rcx, 123;"
// "call r12;");
typedef void *(*readClass_t)(void *, bool, bool);
typedef void *(*realizeClassWithoutSwift_t)(void *, void*);
typedef void *(*remapClass_t)(void *);
typedef void *(*load_method_t)(void*, void*);
typedef void *(*sel_lookUpByName_t)(const char*);
typedef void (*addClassTableEntry_t)(void *);
typedef void (*schedule_class_load_t)(void *);
typedef void *(*objc_autoreleasePoolPush_t)();
typedef void (*objc_autoreleasePoolPop_t)(void *);
void* header = libfixing->header;
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
const uint32_t ncmds = *((uint32_t *)header + 4);
char* command_ptr = ptr;
uint64_t linkedit_vmaddr;
uint64_t linkedit_fileoffset;
uint64_t slide;
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd == LC_SEGMENT_64) {
char *name = (char *)((uint64_t *)ptr + 1);
uint64_t vmaddr = *((uint64_t *)ptr + 3);
uint64_t fileoffset = *((uint64_t *)ptr + 5);
// this assumes that __TEXT comes before __DATA_CONST
printf("segment %s\n", name);
if (custom_strcmp(name, "__TEXT") == 0) {
slide = (uint64_t)header - vmaddr;
} else if (custom_strcmp(name, "__DATA_CONST") == 0) {
uint64_t nsect = *((uint32_t *)ptr + 8*2);
char* sections_ptr = (char*)((uint32_t*)ptr + 18);
for (int sec = 0; sec < nsect; sec++) {
char* secname = sections_ptr;
printf("section %s\n", secname);
if (custom_strncmp(secname, "__objc_classbruh", 16) == 0) {
uint64_t addr = *((uint64_t*)sections_ptr + 4);
uint64_t size = *((uint64_t*)sections_ptr + 5);
uint64_t *data_ptr = (uint64_t*)(addr + slide);
readClass_t readClass = (readClass_t)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__ZL9readClassP10objc_classbb");
realizeClassWithoutSwift_t realizeClassWithoutSwift = (realizeClassWithoutSwift_t)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__ZL24realizeClassWithoutSwiftP10objc_classS0_");
for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) {
// this pointer is rebased by dyld and points to the correct class interface
// for some reason, we can skip this and it should still work
readClass((void*)data_ptr[ptr_i], false, false);
realizeClassWithoutSwift((void*)data_ptr[ptr_i], 0);
}
}
else if (custom_strncmp(secname, "__objc_nlclsbruh", 16) == 0) {
uint64_t addr = *((uint64_t*)sections_ptr + 4);
uint64_t size = *((uint64_t*)sections_ptr + 5);
uint64_t *data_ptr = (uint64_t*)(addr + slide);
uint64_t* loadable_classes = (uint64_t*)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__ZL16loadable_classes");
uint32_t* loadable_classes_allocated = (uint32_t*)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__ZL26loadable_classes_allocated");
uint32_t* loadable_classes_used = (uint32_t*)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__ZL21loadable_classes_used");
remapClass_t remapClass = (remapClass_t)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__ZL10remapClassP10objc_class");
schedule_class_load_t schedule_class_load = (schedule_class_load_t)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__ZL19schedule_class_loadP10objc_class");
realizeClassWithoutSwift_t realizeClassWithoutSwift = (realizeClassWithoutSwift_t)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__ZL24realizeClassWithoutSwiftP10objc_classS0_");
addClassTableEntry_t addClassTableEntry = (addClassTableEntry_t)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__ZL18addClassTableEntryP10objc_classb");
sel_lookUpByName_t sel_lookUpByName = (sel_lookUpByName_t)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "_sel_lookUpByName");
objc_autoreleasePoolPush_t objc_autoreleasePoolPush = (objc_autoreleasePoolPush_t)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__objc_autoreleasePoolPush");
objc_autoreleasePoolPop_t objc_autoreleasePoolPop = (objc_autoreleasePoolPop_t)find_in_symtab(
"/usr/lib/libobjc.A.dylib", &cache, "__objc_autoreleasePoolPop");
// https://github.com/apple-oss-distributions/objc4/blob/689525d556eb3dee1ffb700423bccf5ecc501dbf/runtime/objc-runtime-new.mm#L3822
for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) {
void* cls = remapClass((void*)data_ptr[ptr_i]);
if (!cls) continue;
addClassTableEntry(cls);
realizeClassWithoutSwift(cls, 0);
printf("build nonlazy class at (%llx)%p\n", data_ptr[ptr_i], cls);
}
printf("loadable_classes %llx %llx %llx\n", *loadable_classes, *loadable_classes_used, *loadable_classes_allocated);
for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) {
void* cls = remapClass((void*)data_ptr[ptr_i]);
schedule_class_load(cls);
printf("add class load (%llx)%p\n", data_ptr[ptr_i], cls);
}
printf("loadable_classes %llx\n", *loadable_classes);
void* pool = objc_autoreleasePoolPush();
{
struct loadable_class_t {
void* cls;
void* method;
};
struct loadable_class_t *classes = (struct loadable_class_t*)*loadable_classes;
int used = *loadable_classes_used;
*loadable_classes = 0;
*loadable_classes_allocated = 0;
*loadable_classes_used = 0;
void* sel = sel_lookUpByName("load");
printf("selector %p\n", sel);
// Call all +loads for the detached list.
for (i = 0; i < used; i++) {
void* cls = classes[i].cls;
load_method_t load_method = (load_method_t)classes[i].method;
printf("call load of class %p %p\n", cls, load_method);
if (!cls) continue;
(load_method)(cls, sel);
}
// Destroy the detached list.
if (classes) free(classes);
}
objc_autoreleasePoolPop(pool);
printf("loadable_classes %llx\n", *loadable_classes);
}
sections_ptr += 16 * 2 + 8 * 2 + 4 * 8;
}
} else if (custom_strcmp(name, "__LINKEDIT") == 0) {
linkedit_vmaddr = vmaddr;
linkedit_fileoffset = fileoffset;
}
}
ptr += cmdsize;
} }
} }