diff --git a/research/strings_empty/restore.cc b/research/strings_empty/restore.cc index 49702eb..b629db5 100644 --- a/research/strings_empty/restore.cc +++ b/research/strings_empty/restore.cc @@ -10,11 +10,6 @@ #include "out/restore.h" -char *pwd; -uint32_t pwd_len; -clock_t start, end; -#define ISARM(header) ((*((uint32_t *)(header)+1) & 0xff) == 0xc) - int custom_strcmp(const char *p1, const char *p2) { const unsigned char *s1 = (const unsigned char *)p1; const unsigned char *s2 = (const unsigned char *)p2; @@ -42,766 +37,9 @@ int custom_strncmp(const char *s1, const char *s2, register size_t n) { return 0; } -void set_cwd(const char *const *envp) { - while (*envp) { - // PWD= - if (0x3d445750 == *(uint32_t *)(*envp)) { - break; - } - envp++; - } - pwd = (char *)(*envp + 4); - for (; pwd[pwd_len] != 0; pwd_len++) - ; -} - const uint32_t magic64 = 0xfeedfacf; const uint32_t magic32 = 0xfeedface; -struct libcache_item { - void *header; - void *trie; - uint32_t trie_size; - uint32_t hash; - - uint64_t slide; - - // pointer to segment address - uint32_t nsegment; - uint64_t *segment; -}; - -struct libcache { - struct libcache_item *libs; - uint32_t size; - - void *main; - void *thislib; - void *libdyld; - - int nrpath; - char **rpaths; -}; - -uint32_t fnv_hash_extend(const char *str, uint32_t h) { - unsigned char *s = (unsigned char *)str; /* unsigned string */ - - /* See the FNV parameters at www.isthe.com/chongo/tech/comp/fnv/#FNV-param */ - const uint32_t FNV_32_PRIME = 0x01000193; /* 16777619 */ - - // uint32_t h = 0x811c9dc5; /* 2166136261 */x - while (*s != 0) { - /* xor the bottom with the current octet */ - h ^= *s++; - /* multiply by the 32 bit FNV magic prime mod 2^32 */ - h *= FNV_32_PRIME; - } - - return h; -} -uint32_t fnv_hash(const char *str) { return fnv_hash_extend(str, 0x811c9dc5); } - -// try these hashes -// https://gist.github.com/sgsfak/9ba382a0049f6ee885f68621ae86079b - -// calculate the hash to search -// _dyld_get_image_name returns the full path to the library -// while the static path in LC_DYLIB (and such) could be relative -// we should expand the path to fullpath to correctly compute the hash -// -// the hardest part is the @rpath, because there can be many LC_RPATH -// and @rpath can also reference @loader_path -uint32_t calculate_libname_hash(const libcache *cache, const char *name) { - uint32_t hash; - uint32_t (*hash_func)(const char *) = fnv_hash; - if (name[0] == '.') { - // resolve relative path with ./ ../ ../../ and so on - char *p = realpath(name, 0); - hash = hash_func(p); - free(p); - } else if (name[0] == '@') { - // TODO: resolve @rpath - // ohyeah this is gonna be wild - // loop through all rpath and resolve that rpath - // then resolve the full path for all rpath - // - // which rpath is correct can be done by checking if the cache has that hash - for (int i = 0; i < cache->nrpath; i++) { - char *rpath = cache->rpaths[i]; - char *p = realpath(rpath, 0); - hash = hash_func(p); - hash = fnv_hash_extend(&name[6], hash); - for (size_t j = 0; j < cache->size; j++) { - if (cache->libs[j].hash == hash) { - free(p); - return hash; - } - } - free(p); - } - // printf("resolver for @rpath is not supported yet\n"); - } else { - hash = hash_func(name); - } - return hash; -} - -// dummy no sus function to look for dyld header -// i don't know if dyld_stub_binder should be better -// because if they are not familiar with dyld -// they would not suspect dyld_stub_binder inside modern macho -// Added iOS 6, macOS 10.8 -extern "C" uint32_t dyld_get_sdk_version(const mach_header *mh); -void exported_from_c(); - -void decode_uleb128(char *&addr, uint32_t *ret) { - uint32_t result = 0; - int shift = 0; - - while (1) { - unsigned char byte = *(unsigned char *)(addr); - addr++; - - result |= (byte & 0x7f) << shift; - shift += 7; - - if (!(byte & 0x80)) - break; - } - - *ret = result; -} - -void *find_header(void *_func) { - // Approach 1: (not stable) - // we assume that text section is small enough to fit on 1 page - // so the header should stay at the top of the page due to allocation logic - // the slice/slide is random but always align 0x1000 so we test a few values - // to see if the magic value is found - // - // Guaranteed to stop, but search range is small - - // const uint64_t page_size = 0x4000; - // uint64_t func = (uint64_t)_func; - // uint64_t potential_head = func + (0x4000 - (func % page_size)); - // void* head = 0; - // for (uint64_t i = 0x1000; i < 0xf000; i+=0x1000) { - // uint32_t* x = (uint32_t*)(potential_head - i); - // if (*x == magic64 || *x == magic32) { - // head = (void*)x; - // break; - // } - // } - // return head; - - // Approach 2: (more stable) - // We know that the header is 0x1000 aligned, - // just loop until the magic value is found - // Using while loop so ¯\_(ツ)_/¯ - const uint64_t page_size = 0x1000; - uint64_t func = (uint64_t)_func; - uint64_t potential_head = func + (0x1000 - (func % page_size)); - - void *head = 0; - uint32_t *x = (uint32_t *)(potential_head); - while (*x != magic64 && *x != magic32) { - x -= 0x1000 / 4; - } - return (void *)x; -} - -uint64_t get_slide(const void *header) { - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - uint64_t slice = 0; - const uint32_t ncmds = *((uint32_t *)header + 4); - for (int i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_SEGMENT_64) { - char *name = (char *)((uint64_t *)ptr + 1); - uint64_t vmaddr = *((uint64_t *)ptr + 3); - uint64_t fileoffset = *((uint64_t *)ptr + 5); - if (custom_strcmp(name, "__TEXT") == 0) { - slice = (uint64_t)header - vmaddr; - return slice; - } - } - ptr += cmdsize; - } - return 0; -} - -void *get_selfbind(const void *header) { - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - char *command_ptr = ptr; - - uint64_t slide; - for (int i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_SEGMENT_64) { - char *name = (char *)((uint64_t *)ptr + 1); - uint64_t vmaddr = *((uint64_t *)ptr + 3); - uint64_t fileoffset = *((uint64_t *)ptr + 5); - // this assumes that __TEXT comes before __DATA_CONST - if (custom_strcmp(name, "__TEXT") == 0) { - slide = (uint64_t)header - vmaddr; - } else if (custom_strcmp(name, "__DATA") == 0) { - uint64_t nsect = *((uint32_t *)ptr + 8 * 2); - char *sections_ptr = (char *)((uint32_t *)ptr + 18); - sections_ptr += (16 * 2 + 8 * 2 + 4 * 8) * (nsect - 1); - - for (int sec = 0; sec < nsect; sec++) { - char *secname = sections_ptr; - if (custom_strcmp(secname, "selfbind") == 0) { - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - uint32_t *data_ptr = (uint32_t *)(addr + slide); - return (void *)data_ptr; - } - sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; - } - } - } - ptr += cmdsize; - } - return 0; -} - -void print_macho_summary(const void *header) { - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - uint64_t linkedit_vmaddr; - uint64_t linkedit_fileoffset; - uint64_t slide; - printf("parsing macho at %p\n", header); - printf("ncmds %x\n", ncmds); - for (int i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - printf(" cmd %x %x\n", cmd, cmdsize); - if (cmd == LC_DYLD_EXPORTS_TRIE) { - const uint32_t offset = *((uint32_t *)ptr + 2); - const uint32_t size = *((uint32_t *)ptr + 3); - printf(" export trie: offset=0x%x size=0x%x\n", offset, size); - } - if (cmd == LC_SEGMENT_64) { - char *name = (char *)((uint64_t *)ptr + 1); - uint64_t vmaddr = *((uint64_t *)ptr + 3); - uint64_t vmsize = *((uint64_t *)ptr + 4); - uint64_t fileoffset = *((uint64_t *)ptr + 5); - uint64_t filesize = *((uint64_t *)ptr + 6); - if (custom_strcmp(name, "__TEXT") == 0) { - slide = (uint64_t)header - vmaddr; - printf(" --- slide=0x%llx ---\n", slide); - } else if (custom_strcmp(name, "__LINKEDIT") == 0) { - linkedit_vmaddr = vmaddr; - linkedit_fileoffset = fileoffset; - } - printf(" Segment %s\n", name); - printf(" vmaddr=0x%llx fileoffset=0x%llx\n", vmaddr, fileoffset); - printf(" vmsize=0x%llx filesize=0x%llx\n", vmsize, filesize); - - uint64_t nsect = *((uint32_t *)ptr + 8 * 2); - char *sections_ptr = (char *)((uint32_t *)ptr + 18); - for (int sec = 0; sec < nsect; sec++) { - char *secname = sections_ptr; - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - uint32_t fileoffset = *((uint32_t *)sections_ptr + 6 * 2); - printf(" Section %s\n", sections_ptr); - printf(" addr=0x%llx size=0x%llx fileoffset=0x%x\n", addr, size, - fileoffset); - } - } - if (cmd == LC_SYMTAB) { - uint32_t symoff = *((uint32_t *)ptr + 2); - uint32_t nsym = *((uint32_t *)ptr + 3); - uint32_t stroff = (*((uint32_t *)ptr + 4)); - uint32_t strsize = *((uint32_t *)ptr + 5); - - struct symbol_t { - uint32_t strx; - uint8_t flags; - uint8_t sect; - uint16_t desc; - uint64_t value; - }; - - uint64_t symtab_start = - (uint64_t)symoff - linkedit_fileoffset + slide + linkedit_vmaddr; - uint64_t stroff_start = - (uint64_t)stroff - linkedit_fileoffset + slide + linkedit_vmaddr; - - printf(" symtab: offset=0x%x nsym=0x%x\n", symoff, nsym); - for (int j = 0; j < nsym; j++) { - struct symbol_t *symtab = (struct symbol_t *)symtab_start; - struct symbol_t symbol = symtab[j]; - char *name = (char *)stroff_start + symbol.strx; - printf(" %s %llx => %p\n", name, symbol.value, - (void *)(symbol.value + slide)); - } - } - if (cmd == LC_REEXPORT_DYLIB) { - uint32_t name_offset = *((uint32_t *)ptr + 2); - char *name = (char *)ptr + name_offset; - printf(" reexport lib %s\n", name); - } - ptr += cmdsize; - } -} - -void *get_export_trie(const void *header, uint32_t &size) { - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - uint64_t slice = 0; - uint64_t linkedit_vmaddr = 0; - uint64_t linkedit_fileoffset = 0; - const uint32_t ncmds = *((uint32_t *)header + 4); - for (int i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_DYLD_EXPORTS_TRIE) { - const uint32_t offset = *((uint32_t *)ptr + 2); - size = *((uint32_t *)ptr + 3); - uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset; - return (void *)(linkedit_vmaddr + slice + offset_in_linkedit); - } - if (cmd == LC_DYLD_INFO_ONLY) { - const uint32_t offset = *((uint32_t *)ptr + 10); - size = *((uint32_t *)ptr + 11); - uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset; - return (void *)(linkedit_vmaddr + slice + offset_in_linkedit); - } - if (cmd == LC_SEGMENT_64) { - char *name = (char *)((uint64_t *)ptr + 1); - uint64_t vmaddr = *((uint64_t *)ptr + 3); - uint64_t fileoffset = *((uint64_t *)ptr + 5); - if (custom_strcmp(name, "__TEXT") == 0) { - slice = (uint64_t)header - vmaddr; - } else if (custom_strcmp(name, "__LINKEDIT") == 0) { - linkedit_vmaddr = vmaddr; - linkedit_fileoffset = fileoffset; - } - } - ptr += cmdsize; - } - return 0; -} - -uint32_t should_follow_symbol(char *&buffer, char *&_find) { - // printf("follow check %s has prefix: %s\n", _find, buffer); - char *find = _find; - char is_prefix = true; - while (1) { - int find_end = *find == 0; - int buffer_end = *buffer == 0; - int check = *buffer == *find; - // printf("check is %x == %x\n", *buffer, *find); - - if (buffer_end) { - // we must always run to the end of buffer, marked 0x00 - buffer++; - break; - } - if (find_end) { - // symbol to find is shorter than current buffer string - // but we still need to run to the end of buffer - // so just set not prefix - is_prefix = false; - } - if (!check) { - is_prefix = false; - } - buffer++; - find++; - } - // only move forward if is_prefix - if (is_prefix) { - _find = find; - // printf("prefix is found\n"); - } - return is_prefix; -} - -void *find_in_export_trie(const void *header, void *trie, char *&symbol) { - uint32_t func = 0; - - char *ptr = (char *)trie; - char *find = (char *)symbol; - while (1) { - // terminal node will have data - uint32_t data_count = 0; - decode_uleb128(ptr, &data_count); - if (data_count != 0 && *find == 0) { - // printf("reached terminal node\n"); - break; - } else if (data_count) { - // still need to follow the branch - ptr += data_count; - } - char num_child = ptr[0]; - ptr++; - - int still_following = 0; - for (char i = 0; i < num_child; i++) { - still_following = should_follow_symbol(ptr, find); - uint32_t follow_offset; - decode_uleb128(ptr, &follow_offset); - if (still_following) { - ptr = (char *)trie + follow_offset; - break; - } - } - - if (!still_following) { - // symbol not found - return 0; - } - } - - char count = *(ptr - 1); - uint8_t flag = *ptr++; // flags - // uleb128 offset - decode_uleb128(ptr, &func); - - if (flag == 0x8 /*re-export*/) { - // this hits a re-export symbol but with another name - // usually, the re-export is the same name on another library - // but somehow, for system libraries, a lot of symbols are - // renamed and re-exported from another library - // probably this was to build wrappers and - // have custom platform optimizations - // - // example of these is _strlen in libsystem_c.dylib - // is re-exported from __platform_strlen in libplatform - // - // The purpose of using char*& is to change the symbol searching - // to another symbols and do it quickly using references (pointer) - // - // we return 0 so the dlsym continues to search, - // but with another symbol name because the symbol points to another string - symbol = ptr; - return 0; - } - return (void *)((char *)header + func); -} - -void *find_in_lib(struct libcache *cache, struct libcache_item *lib, - char *&symbol); - -void *find_in_reexport(struct libcache *cache, struct libcache_item *lib, - char *&symbol) { - void *header = lib->header; - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - for (int i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd != LC_REEXPORT_DYLIB) { - ptr += cmdsize; - continue; - } - uint32_t name_offset = *((uint32_t *)ptr + 2); - char *name = (char *)ptr + name_offset; - uint32_t hash = calculate_libname_hash(cache, name); - for (int j = 0; j < cache->size; j++) { - struct libcache_item *reexport = &cache->libs[j]; - if (reexport->hash != hash) { - continue; - } - void *found = find_in_lib(cache, reexport, symbol); - if (found) - return found; - } - ptr += cmdsize; - } - return 0; -} - -void *find_in_lib(struct libcache *cache, struct libcache_item *lib, - char *&symbol) { - void *direct = find_in_export_trie(lib->header, lib->trie, symbol); - if (direct) { - return direct; - } - // cannot find in directly exported trie, loop through all reexport libs - return find_in_reexport(cache, lib, symbol); -} - -// the current logic of dlsym is not correct, but it works for PoC -// -// dlsym searchs and match libraries based on the LC_DYLD_ID load command -// while for our PoC, we use the paths of libraries to search for them -// -// for performance reasons, we do not compare the paths as strings -// we instead use a simple hash to carry out comparision -// using hashes allows us to compare integers and would be faster -void *custom_dlsym(struct libcache *cache, uint32_t hash, const char *symbol) { - for (size_t i = 0; i < cache->size; i++) { - struct libcache_item *cache_lib = &cache->libs[i]; - if (cache_lib->hash == hash) { - // read find_in_export_trie comments to know the use of char*& - // - // this code is for when the symbol searching references - // a previous item in search chain - // - // For example: - // searching for X in [A, B, C], - // C has X but it is a re-export from B with the name Y - // then we have to perform a search again from the top - // but with symbol Y - char **symbol_copy = (char **)&symbol; - void *func = find_in_lib(cache, cache_lib, *symbol_copy); - if (*symbol_copy != symbol) { - func = find_in_lib(cache, cache_lib, *symbol_copy); - } - return func; - } - } - return 0; -} - -void *custom_dlsym(struct libcache *cache, const char *libname, - const char *symbol) { - uint32_t hash = calculate_libname_hash(cache, libname); - return custom_dlsym(cache, hash, symbol); -} - -void bootstrap_libcache_item(struct libcache_item *item, const void *header, - const char *name) { - item->header = (void *)header; - item->trie = get_export_trie(header, item->trie_size); - - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - char *command_ptr = ptr; - - for (int i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_SEGMENT_64) { - char *name = (char *)((uint64_t *)ptr + 1); - if (custom_strcmp(name, "__TEXT") == 0) { - uint64_t vmaddr = *((uint64_t *)ptr + 3); - item->slide = (uint64_t)header - vmaddr; - } - item->nsegment++; - } - ptr += cmdsize; - } - - ptr = command_ptr; - item->segment = (uint64_t *)malloc(sizeof(uint64_t) * item->nsegment); - for (int i = 0, segment_i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_SEGMENT_64) { - uint64_t vmaddr = *((uint64_t *)ptr + 3); - item->segment[segment_i++] = (vmaddr + item->slide); - } - ptr += cmdsize; - } - return; -} - -struct libcache_item *get_libcache_with_name(struct libcache *cache, - const char *name) { - void *to_find = 0; - if (custom_strcmp(name, "main") == 0) { - to_find = cache->main; - } else if (custom_strcmp(name, "thislib") == 0) { - to_find = cache->thislib; - } - uint32_t hash = calculate_libname_hash(cache, name); - for (int i = 0; i < cache->size; i++) { - struct libcache_item *cache_lib = &cache->libs[i]; - // search by hash or by pointer for special case - if (cache_lib->hash == hash || cache_lib->header == to_find) { - return cache_lib; - } - } - return 0; -} - -void dump_export_trie(const void *trie, uint32_t size, const char *filename) { - FILE *outfile = fopen(filename, "wb"); - fwrite((char *)trie, size, 1, outfile); - fclose(outfile); -} - -void dump_export_trie_of(const char *libname, const libcache *cache, - const char *filename) { - uint32_t hash = calculate_libname_hash(cache, libname); - for (int i = 0; i < cache->size; i++) { - struct libcache_item cache_lib = cache->libs[i]; - if (cache_lib.hash == hash) { - return dump_export_trie(cache_lib.trie, cache_lib.trie_size, filename); - } - } -} - -void *find_in_symtab(const libcache_item *lib, const char *find) { - void *header = lib->header; - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - char *command_ptr = ptr; - - uint64_t linkedit_vmaddr; - uint64_t linkedit_fileoffset; - uint64_t slide; - for (int i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_SYMTAB) { - uint32_t symoff = *((uint32_t *)ptr + 2); - uint32_t nsym = *((uint32_t *)ptr + 3); - uint32_t stroff = (*((uint32_t *)ptr + 4)); - uint32_t strsize = *((uint32_t *)ptr + 5); - - struct symbol_t { - uint32_t strx; - uint8_t flags; - uint8_t sect; - uint16_t desc; - uint64_t value; - }; - - uint64_t symtab_start = - (uint64_t)symoff - linkedit_fileoffset + slide + linkedit_vmaddr; - uint64_t stroff_start = - (uint64_t)stroff - linkedit_fileoffset + slide + linkedit_vmaddr; - - for (int j = 0; j < nsym; j++) { - struct symbol_t *symtab = (struct symbol_t *)symtab_start; - struct symbol_t symbol = symtab[j]; - char *name = (char *)stroff_start + symbol.strx; - if (custom_strcmp(name, find) == 0) { - return (void *)(symbol.value + slide); - } - } - break; - } - if (cmd == LC_SEGMENT_64) { - char *name = (char *)((uint64_t *)ptr + 1); - uint64_t vmaddr = *((uint64_t *)ptr + 3); - uint64_t fileoffset = *((uint64_t *)ptr + 5); - if (custom_strcmp(name, "__TEXT") == 0) { - slide = (uint64_t)header - vmaddr; - } else if (custom_strcmp(name, "__LINKEDIT") == 0) { - linkedit_vmaddr = vmaddr; - linkedit_fileoffset = fileoffset; - } - } - ptr += cmdsize; - } - return 0; -} - -void *find_in_symtab(const char *libname, const libcache *cache, - const char *find) { - uint32_t hash = calculate_libname_hash(cache, libname); - struct libcache_item *cache_lib = 0; - for (int i = 0; i < cache->size; i++) { - if (cache->libs[i].hash == hash) { - cache_lib = &(cache->libs[i]); - break; - } - } - return find_in_symtab(cache_lib, find); -} -int hook_printf(const char *format, ...) { - va_list args; - va_start(args, format); - - printf("HOOKED BEGIN LOL\n"); - int status = printf(format, args); - printf("HOOKED END LOL\n"); - - va_end(args); - return status; -} - -typedef void *(*readClass_t)(void *, bool, bool); -typedef void *(*realizeClassWithoutSwift_t)(void *, void *); -typedef void *(*remapClass_t)(void *); -typedef void *(*load_method_t)(void *, void *); -typedef void *(*sel_lookUpByName_t)(const char *); -typedef void (*addClassTableEntry_t)(void *); -typedef void (*schedule_class_load_t)(void *); - -typedef void *(*objc_autoreleasePoolPush_t)(); -typedef void (*objc_autoreleasePoolPop_t)(void *); - -struct custom_initializer_t { - // used for Objective-C load methods - uint64_t *loadable_classes; - uint32_t *loadable_classes_used; - sel_lookUpByName_t sel_lookUpByName; - objc_autoreleasePoolPush_t objc_autoreleasePoolPush; - objc_autoreleasePoolPop_t objc_autoreleasePoolPop; - remapClass_t remapClass; - schedule_class_load_t schedule_class_load; - uint64_t *cls; - size_t ncls; - // used for constructors - void *programvars; - uint64_t *constructors; - size_t nconstructors; -}; - -// global variable for PoC -struct custom_initializer_t *custom_initializer_i; - struct ProgramVars { void *mh; // mach_header or mach_header64 int *NXArgcPtr; @@ -810,21 +48,13 @@ struct ProgramVars { const char **__prognamePtr; }; -void build_cache(struct libcache &cache, void *main); -void fix(struct libcache &cache); -void find_all_rpath(struct libcache &cache, void *main); - -void test(struct libcache &cache); - -void restore_strings(void); +void restore_strings(void* main); __attribute__((constructor)) static void bruh(int argc, const char *const argv[], const char *const envp[], const char *const apple[], const struct ProgramVars *vars) { - start = clock(); - printf("=== rebuilding the strings ===\n"); - - restore_strings(); + void* main = (void *)(vars->mh); + restore_strings(main); } /// strings in __TEXT,__cstring has been removed and this @@ -866,87 +96,10 @@ bruh(int argc, const char *const argv[], const char *const envp[], /// we only move the section entry. /// /// Remember to update the command count in macho header (+1). -void restore_strings(void) { -} +void restore_strings(void* main) { + printf("=== rebuilding the strings ===\n"); -void build_cache(struct libcache &cache, void *main) { - const uint64_t main_slide = get_slide(main); - // Find our lib (mapped) file - const void *thislib = find_header((void *)bruh); - // Find dyld lib (mapped) file using a no-sus function - const void *libdyld = find_header((void *)dyld_get_sdk_version); - - cache.main = (void *)main; - cache.thislib = (void *)thislib; - cache.libdyld = (void *)libdyld; - uint32_t libsystem_hash = - calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); - - // From libdyld header, we can list exports table - // to find all function we want to use - // - // This way there is no leakage of functions we use to do our trick - // mostly to hide - // - _dyld_image_count - // - _dyld_get_image_name - // - _dyld_get_image_header - // - _dyld_get_image_vmaddr_slide - - // The above functions are crucial to find all libraries loaded - // From which we will traverse the exports table to replace - // _got and _la_symbol_pointer data - - // Our lib can hide more details too - // We can resolve all functions we use - // before resolving the main executable imports - // - // This will make our lib use only dyld_get_sdk_version - // For the main executable, imports are empty due to manual resolve - - printf("executable header at %p\n", main); - printf("lib header at %p\n", thislib); - printf("libdyld header at %p\n", libdyld); - - find_all_rpath(cache, main); - uint32_t trie_size; - void *libdyld_export_trie = get_export_trie(libdyld, trie_size); - - // we have to traverse the trie to find these symbols - // because if we self-rebuild import table for **this** lib, - // these symbols aren't resolved - // so we have to resolve ourselves and then rebuild the symbols for others - typedef int (*dyld_image_count_t)(void); - typedef char *(*dyld_get_image_name_t)(int); - typedef void *(*dyld_get_image_header_t)(int); - - char *dyld_image_count_s = (char *)"__dyld_image_count"; - int (*dyld_image_count_func)(void) = (dyld_image_count_t)find_in_export_trie( - libdyld, libdyld_export_trie, dyld_image_count_s); - - char *dyld_get_image_header_s = (char *)"__dyld_get_image_header"; - void *(*dyld_get_image_header_func)(int) = - (dyld_get_image_header_t)find_in_export_trie(libdyld, libdyld_export_trie, - dyld_get_image_header_s); - - char *dyld_get_image_name_s = (char *)"__dyld_get_image_name"; - char *(*dyld_get_image_name_func)(int) = - (dyld_get_image_name_t)find_in_export_trie(libdyld, libdyld_export_trie, - dyld_get_image_name_s); - - cache.size = dyld_image_count_func(); - cache.libs = - (struct libcache_item *)malloc(sizeof(struct libcache_item) * cache.size); - for (int i = 0; i < cache.size; i++) { - void *header = dyld_get_image_header_func(i); - char *name = dyld_get_image_name_func(i); - bootstrap_libcache_item(&cache.libs[i], header, name); - cache.libs[i].hash = calculate_libname_hash(&cache, name); - // printf("%p %s\n", header, name); - } -} - -// Function to find all rpath entries of the main executable -void find_all_rpath(struct libcache &cache, void *header) { + void* header = main; const uint32_t magic = *(uint32_t *)header; char *ptr = (char *)header; if (magic == magic64) { @@ -954,449 +107,11 @@ void find_all_rpath(struct libcache &cache, void *header) { } else { ptr += 0x20 - 0x4; } - printf("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"); const uint32_t ncmds = *((uint32_t *)header + 4); - printf("RPATHS:\n"); - cache.nrpath = 0; - for (uint32_t i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_RPATH) - cache.nrpath++; - ptr += cmdsize; - } - uint32_t idx = 0; - ptr = (char *)header; - ptr += (magic == magic64) ? 0x20 : 0x20 - 0x4; - cache.rpaths = (char **)malloc(sizeof(char *) * cache.nrpath); - for (uint32_t i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_RPATH) { - cache.rpaths[idx++] = (char *)ptr + 12; - printf("%s\n", cache.rpaths[idx - 1]); - } - ptr += cmdsize; - } - printf("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"); -} -void fix_binds(struct libcache_item *libfixing, struct libcache *cache, - int n_ins, uint32_t *instructions, char *libs, char *symbols) { - uint32_t libsystem_hash = - calculate_libname_hash(cache, "/usr/lib/libSystem.B.dylib"); + uint32_t slide = 0; - typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int); - typedef void *(*mach_task_self_t)(); - mach_task_self_t mach_task_self_func = - (mach_task_self_t)custom_dlsym(cache, libsystem_hash, "_mach_task_self"); - vm_protect_t vm_protect_func = - (vm_protect_t)custom_dlsym(cache, libsystem_hash, "_vm_protect"); - - int npage_rw_fixed = 0; - uint64_t page_rw_fixed[10]; // should be dynamic, but works for now - - int pc = 0; - for (; pc != n_ins;) { - uint32_t libidx = instructions[pc]; - uint32_t nsym = instructions[pc + 1]; - pc += 2; - - char *lib = libs + libidx; - for (int i = 0; i < nsym; i++) { - uint32_t op = instructions[pc]; - uint32_t offset = instructions[pc + 1]; - pc += 2; - - uint32_t symidx = op >> 8; - uint32_t segment = op & 0xff; - char *sym = symbols + symidx; - - uint64_t fix_at = offset + libfixing->segment[segment]; - - // enable WRITE protection for this data segment - int need_rw_fix = true; - for (int j = 0; j < npage_rw_fixed; j++) { - if (page_rw_fixed[j] <= fix_at && page_rw_fixed[j] + 0x1000 > fix_at) { - need_rw_fix = false; - } - } - if (need_rw_fix) { - uint64_t start_page = fix_at - (fix_at % 0x1000); - vm_protect_func(mach_task_self_func(), start_page, 0x1000, 0, - VM_PROT_READ | VM_PROT_WRITE); - page_rw_fixed[npage_rw_fixed++] = start_page; - printf("modify page starts at 0x%llx to RW\n", start_page); - } - - void *resolved = 0; - // search with hash is faster - // resolved = custom_dlsym(&cache, symbol.hash, symbol.name); - if (resolved == 0) { - // but fuck apple they have relative path and rpath - resolved = custom_dlsym(cache, lib, sym); - } - *(uint64_t *)fix_at = (uint64_t)resolved; - - printf("imports need to fix: %s at 0x%llx\n", sym, fix_at); - printf(" from=%s\n", lib); - printf(" segment id=%d; offset=0x%x;", segment, offset); - printf(" resolved=%llx(%p)\n", *(uint64_t *)fix_at, resolved); - } - } -} - -void fix_objc(struct libcache_item *libfixing, struct libcache &cache); -void fix_initializer(struct libcache_item *libfixing, struct libcache &cache); -void fix(struct libcache &cache) { - // now we have function to find exported symbols - // it supports full name search or hash search - // to reserve space, we use the hash search - // - // so we will collect all imported symbols, and its offset to fix - // with legacy symbol resolve - // __got always has dyld_stub_binder - // __la_symbol_ptr - // with modern symbol resolve - // __got now contains full rebase/bind opcode - // - // the list of all imported symbols should be - // [(offset, name, libhash)] - // if we want to also fix framework/libraries used by the main executable, - // (only those that are not governed by the system) - // we should also have extra list(s) for that lib to resolve ourselves - // - // main: [(offset, name, libhash)] - // libA: [(offset, name, libhash)] - // libB: [(offset, name, libhash)] - // - // using the list is temporary for PoC - // we know that many symbols are exported from 1 lib - // so we can build a trie (yes, more trie) - // where the symbols are now concatenated with libhash 4 bytes as prefix - // and the offset is at the terminal node - // - // this way, we can reduce the libhash, although we need to build a trie - // build the trie is harder than traversing it - // - // just an idea, if we can somehow reduce the datasize then it would be better - - // OBJC: - // In Objective-C, the binary is loaded with the Objective-C runtime - // This runtime (a library) install a hook on dyld for all images - // And because this runtime is a system runtime, the bootstrap step is already - // prepared The details on this runtime will be in a seperated document, below - // are some basics - // - // The compiler for Objective-C emits a bunch of details for the runtime in - // the binary itself These information are stored in sections with prefix name - // __objc, namely - // - __objc_classlist - // - __objc_clssrefs - // - __objc_selref - // - __objc_const - // - __objc_data - // - // Objective-C stores the class interface in the binary particulary in - // __objc_data This interface contains the superclass, metaclass, and a cache - // to methods pointers These information are either bound (by dyld) or built - // (by Objective-C runtime) - // - // One of the important routine in the Objective-C runtime is readClass. - // https://github.com/apple-oss-distributions/objc4/blob/689525d556eb3dee1ffb700423bccf5ecc501dbf/runtime/objc-runtime-new.mm#L3385 - // - // This function is not exported, however there is an entry in the symtab. - // By using this, we can find the its address - // - // Because __objc_data contains to-be-bound values, - // which will be resolved by dyld and referenced by Objective-C runtime later - // if we simply erase this value, reference(s) read by Objective-C runtime - // ensues a crash (through debugging, we know that the crash happens in - // readClass, realizeClassWithoutSwift) - // - // However, we can evade this by making the runtime thinks there is no class - // needs setup This can be done by changing the __objc_classlist to some other - // name or remove this section Because the runtime find the __objc_classlist - // section by name, and the size of the section is used to iterate through - // pointers. So if we change the name, the runtime will have no class to run - // setup. Or complete removal and call the setup by ourselves, because we know - // where the data is - // - // The setup is done through readClass function, as said above, its address - // can be found This function is pure C function so call into this function is - // easy - // - // Important function with their names: - // _readClass(objc_class*, bool, bool) - // mangled: __ZL9readClassP10objc_classbb - // - // _realizeClassWithoutSwift(objc_class*, objc_class*) - // mangled: __ZL24realizeClassWithoutSwiftP10objc_classS0_ - // - // _remapClass(objc_class*) - // mangled: __ZL10remapClassP10objc_class - // - // _addClassTableEntry(objc_class*, bool) - // magled: __ZL18addClassTableEntryP10objc_classb - - // NOTES: - // mach_task_self() has a conflicting symbol or something, - // in symbol table it's: _mach_task_self_ - // but have to search with: _mach_task_self - // - // so future replacement into mach_task_self has to use _mach_task_self - // despite the symbol is _mach_task_self_ - // - // may need to look into why this happens so we can deal with this more - // generic - - // resolve selfbind if exist - { // stored inside __DATA,selfbind - struct libcache_item *libfixing = get_libcache_with_name(&cache, "thislib"); - struct selfbind_t { - uint32_t liblist_offset; - uint32_t symbollist_offset; - }; - struct selfbind_t *selfbind = - (struct selfbind_t *)get_selfbind(libfixing->header); - - if (selfbind) { - char *libs = (char *)(selfbind + 1) + selfbind->liblist_offset; - char *symbols = (char *)(selfbind + 1) + selfbind->symbollist_offset; - uint64_t n_instructions = ((uint64_t)libs - (uint64_t)(selfbind + 1)) / 4; - uint32_t *encoded_table = (uint32_t *)(selfbind + 1); - - printf("[*] performing selfbind (instructions=%p)\n", selfbind); - fix_binds(libfixing, &cache, n_instructions, encoded_table, libs, - symbols); - } - } - - // the rest of the fixes are in main executable - printf("[*] performing bind for main executable\n"); - struct libcache_item *libfixing = get_libcache_with_name(&cache, "main"); - fix_binds(libfixing, &cache, bshield_data::n_instructions, - bshield_data::encoded_table, bshield_data::libs, - bshield_data::symbols); - - // TODO: Reformat the region as per before, or leave as it - // for (int j = 0; j < npage_rw_fixed; j++) { - // uint64_t start_page = page_rw_fixed[j]; - // vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0, - // VM_PROT_READ); - // } - - // Encrypted __TEXT segment - // char* text_start = (char*)libfixing->header + 0x3000; - // vm_protect_func(mach_task_self_func(), (uint64_t)text_start, 0x1000, 0, - // VM_PROT_READ | VM_PROT_WRITE); - // printf("text fix at %p\n", text_start + 0xb8c); - // for (int i = 0; i < 0x2ac; i++) { - // text_start[0xb8c + i] = text_start[0xb8c + i] ^ 0xcc; - // } - - fix_objc(libfixing, cache); - fix_initializer(libfixing, cache); - - // _TEXT must be RX or RW no RWX - // vm_protect_func(mach_task_self_func(), (uint64_t)text_start, 0x1000, 0, - // VM_PROT_READ | VM_PROT_EXECUTE); -} - -void volatile custom_initializer(int argc, const char *const argv[], - const char *const envp[], - const char *const apple[]) { - printf("[+] run custom initializers\n"); - - if (custom_initializer_i->cls != 0) { - // for Objective-C load - uint64_t *loadable_classes = custom_initializer_i->loadable_classes; - uint32_t *loadable_classes_used = - custom_initializer_i->loadable_classes_used; - sel_lookUpByName_t sel_lookUpByName = - custom_initializer_i->sel_lookUpByName; - objc_autoreleasePoolPop_t objc_autoreleasePoolPop = - custom_initializer_i->objc_autoreleasePoolPop; - objc_autoreleasePoolPush_t objc_autoreleasePoolPush = - custom_initializer_i->objc_autoreleasePoolPush; - remapClass_t remapClass = custom_initializer_i->remapClass; - schedule_class_load_t schedule_class_load = - custom_initializer_i->schedule_class_load; - - for (int i = 0; i < custom_initializer_i->ncls; i++) { - void *cls0 = (void *)custom_initializer_i->cls[i]; - void *cls = remapClass(cls0); - if (!cls) - continue; - schedule_class_load(cls); - } - - printf("loadable_classes %llx %x\n", *loadable_classes, - *loadable_classes_used); - - struct loadable_class_t { - void *cls; - void *method; - }; - struct loadable_class_t *classes = - (struct loadable_class_t *)*loadable_classes; - int used = *loadable_classes_used; - *loadable_classes = 0; - // *loadable_classes_allocated = 0; - *loadable_classes_used = 0; - void *sel = sel_lookUpByName("load"); - // Call all +loads for the detached list. - void *pool = objc_autoreleasePoolPush(); - for (int i = 0; i < used; i++) { - void *cls = classes[i].cls; - load_method_t load_method = (load_method_t)classes[i].method; - printf("call load of class %p %p\n", cls, load_method); - if (!cls) - continue; - (load_method)(cls, sel); - } - // Destroy the detached list. - if (classes) - free(classes); - objc_autoreleasePoolPop(pool); - } - - // for constructors - if (custom_initializer_i->constructors) { - typedef void *(*constructors_t)(int, void *, void *, void *, void *); - uint32_t nconst = custom_initializer_i->nconstructors; - for (int i = 0; i < nconst; i++) { - constructors_t cons = - (constructors_t)custom_initializer_i->constructors[i]; - printf("call initializer at %p\n", cons); - cons(argc, (void *)argv, (void *)envp, (void *)apple, - custom_initializer_i->programvars); - } - free(custom_initializer_i->constructors); - } - - printf("[+] initializers completed\n"); - free(custom_initializer_i); - end = clock(); - double cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC; - printf("restoration library time: %lf\n", cpu_time_used); -} - -void fix_objc_classdata(struct libcache_item *libfixing, struct libcache &cache); -void fix_class_refs(struct libcache_item *libfixing, struct libcache &cache); -void run_objc_readclass(struct libcache_item *libfixing, struct libcache &cache); - -// method are splited into 3 kinds, but for simplicity, we think of it as -// 2 kinds: big and small -// our example are small method list, which all pointers are relative and 32-bit -// the size should be 0xc == 12 but we have padding 4-byte 0x0 for some reason? -union _objc_method{ - struct { - const char* name; - const char* types; - void* imp; - }; - struct { - int32_t sel_offset; - int32_t typ_offset; - int32_t imp_offset; - }; -}; - -struct method_t { - const char* name; /* Pointer to name (or selector reference?) */ - const char* types; /* Pointer to type info */ - void* imp; /* Pointer to implementation (code) */ -}; - -// entsize & 0x80000000 is small method kind -// entsize = kind | sizeof(_objc_method) -struct _method_list_t { - uint32_t entsize; // sizeof(struct _objc_method) - uint32_t method_count; - union _objc_method method_list[]; -}; - -struct _class_ro_t { - uint32_t flags; - uint32_t const instanceStart; - uint32_t const instanceSize; - uint32_t const reserved; // only when building for 64bit targets - const uint8_t * const ivarLayout; - const char *const name; - struct _method_list_t * baseMethods; - const /*struct _protocol_list_t*/void *const baseProtocols; - const /*struct _ivar_list_t*/void *const ivars; - const uint8_t * const weakIvarLayout; - const /*struct _prop_list_t*/void *const properties; -}; - -struct _class_t { - struct _class_t *isa; - struct _class_t * superclass; - void *cache; - void *vtable; - struct _class_ro_t *ro; -}; -void fix_objc(struct libcache_item *libfixing, struct libcache &cache) { - printf("[+] dealing with Objective-C\n"); -#ifdef METH1 - fix_objc_classdata(libfixing, cache); -#endif -#ifdef METH3 - printf("METH3\n"); - fix_class_refs(libfixing, cache); -#endif - run_objc_readclass(libfixing, cache); -} - -void test_objc_hijack(void* self, void* selector, void* input) { - printf("[Foo tobehijacked] function is HIJACKED\n"); - printf("arg1=%p arg2=%p arg3=%p\n", self, selector, input); -} - -// a subroutine to perform hooking of fixed-binary classes -// by iterating in the __objc_classref which internally points to -// __objc_data for a list of _class_t structs -// each _classt_t has a _class_ro_t containing pointers to -// the components of an instance, including methods, properties, ivars, ... -// -// in this function, we only work on hooking/hijacking of class methods -// by fixing the method list which to be read by Objective-C runtime during readClass -// the method list is a list of {selector, type, implementation} (all pointers) -// by fixing the implementation (should point to a function) the readClass -// thinks that it is the function associated with the method name/selector -// -// by now, all rebases have been rebased and pointers should be pointing correctly -// however, selectors are to be constructed, unless erased -void fix_objc_classdata(struct libcache_item *libfixing, struct libcache &cache) { - void *header = libfixing->header; - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - char *command_ptr = ptr; - - uint64_t linkedit_vmaddr; - uint64_t linkedit_fileoffset; - uint64_t slide; - - uint64_t methlist_start; - uint64_t methlist_size; - - uint32_t libsystem_hash = - calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); - typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int); - typedef void *(*mach_task_self_t)(); - mach_task_self_t mach_task_self_func = - (mach_task_self_t)custom_dlsym(&cache, libsystem_hash, "_mach_task_self"); - vm_protect_t vm_protect_func = - (vm_protect_t)custom_dlsym(&cache, libsystem_hash, "_vm_protect"); + char* secrets = 0; for (int i = 0; i < ncmds; i++) { const uint32_t cmd = *((uint32_t *)ptr + 0); @@ -1409,142 +124,18 @@ void fix_objc_classdata(struct libcache_item *libfixing, struct libcache &cache) if (custom_strcmp(name, "__TEXT") == 0) { slide = (uint64_t)header - vmaddr; + } else if (custom_strcmp(name, "__BSHIELD") == 0) { + printf("found __BSHIELD segment at %p\n", ptr); + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); char *sections_ptr = (char *)((uint32_t *)ptr + 18); for (int sec = 0; sec < nsect; sec++) { char *secname = sections_ptr; - // to be able to fix method list for hooking, we need this section - // to be writable - if (custom_strncmp(secname, "__objc_methlist", 16) == 0) { + if (custom_strncmp(secname, "__secrets", 16) == 0) { uint64_t addr = *((uint64_t *)sections_ptr + 4); uint64_t size = *((uint64_t *)sections_ptr + 5); - methlist_start = addr + slide; - methlist_size = size; - - printf("setting __objc_methlist to RW: addr=%p size=%x\n", addr + slide, size); - vm_protect_func(mach_task_self_func(), methlist_start, methlist_size, 0, VM_PROT_READ | VM_PROT_WRITE); - } - sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; - } - - } else if (custom_strcmp(name, "__DATA") == 0) { - uint64_t nsect = *((uint32_t *)ptr + 8 * 2); - char *sections_ptr = (char *)((uint32_t *)ptr + 18); - for (int sec = 0; sec < nsect; sec++) { - char *secname = sections_ptr; - // we can iterate in the __objc_data rather than __objc_classref - // classref can also point to outside classes that are imported - if (custom_strncmp(secname, "__objc_data", 16) == 0) { - - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - struct _class_t *data_ptr = (struct _class_t *)(addr + slide); - - for (int nclass = 0; nclass < size / sizeof(struct _class_t); nclass++, data_ptr++) { - // ro can be null for some reasons - // baseMethods is null if the class is a metaclass - if (!(data_ptr->ro && data_ptr->ro->baseMethods)) { - continue; - } - const char* class_name = data_ptr->ro->name; - struct _method_list_t* methods = data_ptr->ro->baseMethods; - for (int i_method = 0; i_method < methods->method_count; i_method++) { - // have to use reference because the relative offset is calculated with the variable address - // if not using reference, then the variable will be a COPY value and the address is localized - union _objc_method* method = &methods->method_list[i_method]; - if (methods->entsize & 0x80000000) { - const char* imp = *(char**)((char*)(&method->sel_offset) + method->sel_offset); - if (custom_strcmp(class_name, "Foo") == 0 && custom_strcmp(imp, "tobehijacked:") == 0) { - // char* current_imp = (char*)(&method->imp_offset) + method->imp_offset; - - // encode the relative pointer - uint64_t replace = (uint64_t)test_objc_hijack; - uint64_t original = (uint64_t)&method->imp_offset; - printf("modify the Objective-C method at %p\n", &method->imp_offset); - if (replace > original) { - method->imp_offset = (int32_t)(replace - original); - } else { - method->imp_offset = -(int32_t)(original - replace); - } - } - - printf(" method=%p\n", method); - printf(" sel=%x --> %p\n", method->sel_offset, (char*)(&method->sel_offset) + method->sel_offset); - printf(" %s\n", name); - printf(" typ=%x --> %s\n", method->typ_offset, (char*)&method->typ_offset + method->typ_offset); - printf(" fun=%x --> %p\n", method->imp_offset, (char*)(&method->imp_offset) + method->imp_offset); - } - else { - const char* imp = method->name; - if (custom_strcmp(class_name, "Foo") == 0 && custom_strcmp(imp, "tobehijacked:") == 0) { - void* replace = (void*)test_objc_hijack; - printf("modify the Objective-C method at %p with legacy format.\n", &method->imp); - method->imp = replace; - } - printf(" method=%p\n", method); - printf(" sel=%s\n", method->name); - printf(" typ=%p\n", method->types); - printf(" fun=%p\n", method->imp); - } - } - } - } - sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; - } - } else if (custom_strcmp(name, "__LINKEDIT") == 0) { - linkedit_vmaddr = vmaddr; - linkedit_fileoffset = fileoffset; - } - } - ptr += cmdsize; - } - - // _TEXT must be RX or RW no RWX - vm_protect_func(mach_task_self_func(), methlist_start, methlist_size, 0, - VM_PROT_READ | VM_PROT_EXECUTE); -} - -uint64_t find_replace_cls_refs(struct libcache cache) { - void *header = cache.thislib; - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - char *command_ptr = ptr; - uint64_t slide; - for (int i = 0; i < ncmds; i++){ - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_SEGMENT_64){ - char* name = (char*)((uint64_t*)ptr + 1); - uint64_t vmaddr = *((uint64_t*)ptr + 3); - if (custom_strcmp(name, "__TEXT") == 0) - slide = (uint64_t)header - vmaddr; - - if (custom_strcmp(name, "__DATA") == 0){ - uint64_t nsect = *((uint32_t*)ptr + 8 * 2); - char* sections_ptr = (char*)((uint32_t*)ptr + 18); - for (int sec = 0; sec < nsect; sec++){ - char* secname = sections_ptr; - if (custom_strncmp(secname, "__objc_data", 11) == 0){ - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - struct _class_t *data_ptr = (struct _class_t *)(addr + slide); - for (int nclass = 0; nclass < size / sizeof(struct _class_t); nclass++, data_ptr++) { - if (!data_ptr->ro) - continue; - if (data_ptr->ro->flags & 0x01) { continue; } - if (custom_strcmp(data_ptr->ro->name, "Hooker") == 0){ - printf("Found Hooker @ %p\n", data_ptr); - return (uint64_t)data_ptr; - } - } + secrets = (char*)(addr + slide); } sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; } @@ -1552,440 +143,17 @@ uint64_t find_replace_cls_refs(struct libcache cache) { } ptr += cmdsize; } -} - -void fix_class_refs(struct libcache_item *libfixing, struct libcache &cache) { - uint64_t replace = find_replace_cls_refs(cache); - void *header = libfixing->header; - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - char *command_ptr = ptr; - uint64_t slide; - for (int i = 0; i < ncmds; i++){ - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_SEGMENT_64){ - char* name = (char*)((uint64_t*)ptr + 1); - uint64_t vmaddr = *((uint64_t*)ptr + 3); - if (custom_strcmp(name, "__TEXT") == 0) - slide = (uint64_t)header - vmaddr; - - if (custom_strcmp(name, "__DATA") == 0){ - uint64_t nsect = *((uint32_t*)ptr + 8 * 2); - char* sections_ptr = (char*)((uint32_t*)ptr + 18); - for (int sec = 0; sec < nsect; sec++){ - char* secname = sections_ptr; - if (custom_strncmp(secname, "__objc_classrefs", 16) == 0){ - uint64_t addr = *((uint64_t*)sections_ptr + 4) + slide; - uint64_t size = *((uint64_t*)sections_ptr + 5); - struct _class_t* target_clsref = NULL; - for (int nclass = 0; nclass < size / sizeof(uint64_t*); nclass++){ - target_clsref = (_class_t*)(*((uint64_t *)addr + nclass)); - // printf("Target clasref @ %p: %p\n", (uint64_t *)addr + nclass, target_clsref); - if (custom_strcmp(target_clsref->ro->name, "Foo") == 0){ - // TODO - printf("Target clasref @ %p: %p\n", (uint64_t *)addr + nclass, target_clsref); - *((uint64_t *)addr + nclass) = replace; - printf("New clasref @ %p: %p\n", (uint64_t *)addr + nclass, *((uint64_t *)addr + nclass)); - struct _class_t* hooker = (struct _class_t*)replace; - printf("superclass hooker: %p\n", target_clsref->superclass); - hooker->superclass = target_clsref; - printf("New superclass hooker: %p\n", hooker->superclass); - break; - } - } - - } - sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; - } - } - } - ptr += cmdsize; - } -} - -void run_objc_readclass(struct libcache_item *libfixing, struct libcache &cache) { - // Manually run the Objective-C runtime for each class - // - - // use the snippet bellow to call class method - // because often the this pointer is stored in a different register - // so need to load that register in before calling the function - // - // void* foo = (void*)function_to_call; - // asm("movq %0, %%r12"::"r"(foo)); - // __asm__(".intel_syntax noprefix;" - // "mov rcx, 123;" - // "call r12;"); - - printf("fixing objective-c\n"); - void *header = libfixing->header; - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - char *command_ptr = ptr; - - uint64_t linkedit_vmaddr; - uint64_t linkedit_fileoffset; - uint64_t slide; - for (int i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_SEGMENT_64) { - char *name = (char *)((uint64_t *)ptr + 1); - uint64_t vmaddr = *((uint64_t *)ptr + 3); - uint64_t fileoffset = *((uint64_t *)ptr + 5); - // this assumes that __TEXT comes before __DATA_CONST - printf("segment %s\n", name); - if (custom_strcmp(name, "__TEXT") == 0) { - slide = (uint64_t)header - vmaddr; - - uint64_t nsect = *((uint32_t *)ptr + 8 * 2); - char *sections_ptr = (char *)((uint32_t *)ptr + 18); - for (int sec = 0; sec < nsect; sec++) { - char *secname = sections_ptr; - printf("section %s\n", secname); - if (custom_strncmp(secname, "__objc_methname", 16) == 0) { - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - uint64_t *data_ptr = (uint64_t *)(addr + slide); - // printf("methname addr %p : %s\n", data_ptr, (char*)data_ptr); - break; - } - sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; - } - } else if (custom_strcmp(name, "__DATA") == 0) { - uint64_t nsect = *((uint32_t *)ptr + 8 * 2); - char *sections_ptr = (char *)((uint32_t *)ptr + 18); - for (int sec = 0; sec < nsect; sec++) { - char *secname = sections_ptr; - printf("section %s\n", secname); - if (custom_strncmp(secname, "__objc_selrefs", 16) == 0) { - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - uint64_t *data_ptr = (uint64_t *)(addr + slide); - - uint32_t trie_size; - char *symbol = (char *)"__dyld_get_objc_selector"; - void *libdyld = cache.libdyld; - void *libdyld_export_trie = get_export_trie(libdyld, trie_size); - typedef void *(*dyld_get_objc_selector_t)(const char *); - dyld_get_objc_selector_t dyld_get_objc_selector_func = - (dyld_get_objc_selector_t)find_in_export_trie( - libdyld, libdyld_export_trie, symbol); - - // resolve method names that cached in the dyld - for (int i = 0; i < bshield_data::n_selectors; i++) { - uint32_t idx = bshield_data::special_selectors_idx[i]; - const char *name = bshield_data::special_selectors_name[i]; - data_ptr[idx] = (uint64_t)dyld_get_objc_selector_func(name); - } - - typedef void *(*sel_lookUpByName_t)(const char *); - sel_lookUpByName_t sel_lookUpByName = - (sel_lookUpByName_t)custom_dlsym( - &cache, "/usr/lib/libobjc.A.dylib", "_sel_lookUpByName"); - printf("selector gogogo: %p\n", - sel_lookUpByName("dateFromString:")); - } - sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; - } - } else if (custom_strcmp(name, "__DATA_CONST") == 0) { - uint64_t nsect = *((uint32_t *)ptr + 8 * 2); - char *sections_ptr = (char *)((uint32_t *)ptr + 18); - for (int sec = 0; sec < nsect; sec++) { - char *secname = sections_ptr; - printf("section %s\n", secname); - if (custom_strncmp(secname, "__objc_classbruh", 16) == 0) { - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - uint64_t *data_ptr = (uint64_t *)(addr + slide); - - readClass_t readClass = - (readClass_t)find_in_symtab("/usr/lib/libobjc.A.dylib", &cache, - "__ZL9readClassP10objc_classbb"); - realizeClassWithoutSwift_t realizeClassWithoutSwift = - (realizeClassWithoutSwift_t)find_in_symtab( - "/usr/lib/libobjc.A.dylib", &cache, - "__ZL24realizeClassWithoutSwiftP10objc_classS0_"); - - for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) { - // this pointer is rebased by dyld and points to the correct class - // interface for some reason, we can skip this and it should still - // work - void *newCls = readClass((void *)data_ptr[ptr_i], false, false); - if (newCls != (void *)data_ptr[ptr_i]) { - realizeClassWithoutSwift(newCls, 0); - } - printf("add class init (%llx)%p\n", data_ptr[ptr_i], newCls); - } - } else if (custom_strncmp(secname, "__objc_nlclsbruh", 16) == 0) { - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - uint64_t *data_ptr = (uint64_t *)(addr + slide); - - uint64_t *loadable_classes = (uint64_t *)find_in_symtab( - "/usr/lib/libobjc.A.dylib", &cache, "__ZL16loadable_classes"); - uint32_t *loadable_classes_allocated = - (uint32_t *)find_in_symtab("/usr/lib/libobjc.A.dylib", &cache, - "__ZL26loadable_classes_allocated"); - uint32_t *loadable_classes_used = - (uint32_t *)find_in_symtab("/usr/lib/libobjc.A.dylib", &cache, - "__ZL21loadable_classes_used"); - - remapClass_t remapClass = - (remapClass_t)find_in_symtab("/usr/lib/libobjc.A.dylib", &cache, - "__ZL10remapClassP10objc_class"); - schedule_class_load_t schedule_class_load = - (schedule_class_load_t)find_in_symtab( - "/usr/lib/libobjc.A.dylib", &cache, - "__ZL19schedule_class_loadP10objc_class"); - realizeClassWithoutSwift_t realizeClassWithoutSwift = - (realizeClassWithoutSwift_t)find_in_symtab( - "/usr/lib/libobjc.A.dylib", &cache, - "__ZL24realizeClassWithoutSwiftP10objc_classS0_"); - addClassTableEntry_t addClassTableEntry = - (addClassTableEntry_t)find_in_symtab( - "/usr/lib/libobjc.A.dylib", &cache, - "__ZL18addClassTableEntryP10objc_classb"); - sel_lookUpByName_t sel_lookUpByName = - (sel_lookUpByName_t)find_in_symtab("/usr/lib/libobjc.A.dylib", - &cache, "_sel_lookUpByName"); - objc_autoreleasePoolPush_t objc_autoreleasePoolPush = - (objc_autoreleasePoolPush_t)find_in_symtab( - "/usr/lib/libobjc.A.dylib", &cache, - "__objc_autoreleasePoolPush"); - objc_autoreleasePoolPop_t objc_autoreleasePoolPop = - (objc_autoreleasePoolPop_t)find_in_symtab( - "/usr/lib/libobjc.A.dylib", &cache, - "__objc_autoreleasePoolPop"); - - // https://github.com/apple-oss-distributions/objc4/blob/689525d556eb3dee1ffb700423bccf5ecc501dbf/runtime/objc-runtime-new.mm#L3822 - for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) { - void *cls = remapClass((void *)data_ptr[ptr_i]); - if (!cls) - continue; - addClassTableEntry(cls); - realizeClassWithoutSwift(cls, 0); - printf("build nonlazy class at (%llx)%p\n", data_ptr[ptr_i], cls); - } - - custom_initializer_i->sel_lookUpByName = sel_lookUpByName; - custom_initializer_i->loadable_classes = loadable_classes; - custom_initializer_i->loadable_classes_used = loadable_classes_used; - custom_initializer_i->objc_autoreleasePoolPush = - objc_autoreleasePoolPush; - custom_initializer_i->objc_autoreleasePoolPop = - objc_autoreleasePoolPop; - custom_initializer_i->schedule_class_load = schedule_class_load; - custom_initializer_i->remapClass = remapClass; - custom_initializer_i->cls = data_ptr; - custom_initializer_i->ncls = size / 8; - // printf("loadable_classes %llx\n", *loadable_classes); - } - sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; - } - } else if (custom_strcmp(name, "__LINKEDIT") == 0) { - linkedit_vmaddr = vmaddr; - linkedit_fileoffset = fileoffset; - } - } - ptr += cmdsize; - } -} - -void fix_initializer(struct libcache_item *libfixing, struct libcache &cache) { - // fix the initializers - // The Objective-C runtime loads the NSObject after this lib booted - // So all calls to NSObject (and its children classes) will segfault/throw - // error - // - // So we will fix the main initializers, which runs after all Objective-C - // setup The initializers will run these Objective-C classes' load methods - // - // (THIS IDEA IS TESTED AND WILL NOT WORK) - // As of now, we assume the main executable has a __mod_init_func section - // In practice, we should always inject an empty __mod_init_func - // But if the binary already has a __mod_init_func section, this section must - // reallocate into a different section to allow for a bigger size (oldsize+1) - // - // This idea can't work because dyld check the pointer to be inside the image, - // which is not if we point it here - // - // (THIS IS THE CORRECT IDEA) - // So for Objective-C binaries, the load chain happens like this - // [(no objc?)lib init] -> [objc runtime] -> [foundations] -> - // [main obj-c load] -> [main constructor] -> [main] - // We need to inject between [objc runtime] and [main] - // - // There could be many ways to do this. I discovered 1 method of doing this. - // - // The idea is to hijack the main function to do the rest of the - // initalizations. By fixing the LC_MAIN command, we can make dyld jump to - // anywhere we want as main. But the command can't be edited at runtime. And - // pointing to the function we want needs a workaround. - // - // So we will write a shellcode in the binary and point main to that - // shellcode. The shellcode basically loads the address of the initalization - // function, call it, then call main, and return. - // - // The shellcode must be able ot get the current pc address to correctly - // calculate address from any offset. In arm64, we can use `adr x8, 0`. If we - // know where the shellcode is, we can effectively calculate the header of - // main. Now, everything is easy, just need offsets to anywhere we want and we - // can get it. - // - // Now the address of the initalization function can be fetched using many - // methods, but it resides inside this library. To reduce redundance work, we - // can write the address of this function somewhere inside main, which will - // then be easily found. - // - // As a result, we choose the space before __text to write the shellcode, - // the space after __DATA to write the address for initalization function. - // Because all segment is allocated/pre-allocated with page alignement, - // we can be pretty sure that there are free space. - // (note: __TEXT segment is aligned to the end of the page, free space in the - // middle) - // - // The shellcode is built using the ios-wrapper tool - // The idea is: - // - // push main arguments - // r8 = shellcode location - // r9 = offset from shellcode to __DATA end - // r8 = r8 + r9 -- get __DATA end address - // r9 = *r8 -- the first pointer is custom_initializer - // call r9 - // r9 = *(r8 + 4) -- the second pointer is main function - // pop main arguments - // jump r9 -- do not call, return to dyld - - void *header = libfixing->header; - const uint32_t magic = *(uint32_t *)header; - char *ptr = (char *)header; - if (magic == magic64) { - ptr += 0x20; - } else { - ptr += 0x20 - 0x4; - } - - const uint32_t ncmds = *((uint32_t *)header + 4); - char *command_ptr = ptr; - - uint64_t linkedit_vmaddr; - uint64_t linkedit_fileoffset; - uint64_t slide; - for (int i = 0; i < ncmds; i++) { - const uint32_t cmd = *((uint32_t *)ptr + 0); - const uint32_t cmdsize = *((uint32_t *)ptr + 1); - if (cmd == LC_SEGMENT_64) { - char *name = (char *)((uint64_t *)ptr + 1); - uint64_t vmaddr = *((uint64_t *)ptr + 3); - uint64_t fileoffset = *((uint64_t *)ptr + 5); - // this assumes that __TEXT comes before __DATA_CONST - printf("segment %s\n", name); - if (custom_strcmp(name, "__TEXT") == 0) { - slide = (uint64_t)header - vmaddr; - uint64_t nsect = *((uint32_t *)ptr + 8 * 2); - char *sections_ptr = (char *)((uint32_t *)ptr + 18); - for (int sec = 0; sec < nsect; sec++) { - char *secname = sections_ptr; - printf("section %s\n", secname); - if (custom_strcmp(secname, "__init_offsets") == 0) { - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - uint32_t *data_ptr = (uint32_t *)(addr + slide); - - printf("found initializer at %p\n", data_ptr); - - custom_initializer_i->nconstructors = size / 4; - custom_initializer_i->constructors = - (uint64_t *)malloc(sizeof(uint64_t) * size / 4); - for (int j = 0; j < size / 4; j++) { - custom_initializer_i->constructors[j] = - (uint64_t)header + data_ptr[j]; - printf("registered initializer at %llx\n", - custom_initializer_i->constructors[j]); - } - } - if (custom_strcmp(secname, "__mod_init_func") == 0) { - // TODO: EHEHE - printf("initializer encoded in __mod_init_func is not supported\n"); - } - sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; - } - } else if (custom_strcmp(name, "__DATA") == 0) { - uint64_t nsect = *((uint32_t *)ptr + 8 * 2); - char *sections_ptr = (char *)((uint32_t *)ptr + 18); - sections_ptr += (16 * 2 + 8 * 2 + 4 * 8) * (nsect - 1); - - uint64_t addr = *((uint64_t *)sections_ptr + 4); - uint64_t size = *((uint64_t *)sections_ptr + 5); - - uint64_t *dummy = (uint64_t *)(addr + slide + size); - dummy[0] = (uint64_t)custom_initializer; - dummy[1] = (uint64_t)(header) + bshield_data::main; - printf("-- add custom main-peg at %p\n", dummy); - printf("-- custom initializer at %llx\n", dummy[0]); - printf("-- main function at %llx\n", dummy[1]); - } else if (custom_strcmp(name, "__LINKEDIT") == 0) { - linkedit_vmaddr = vmaddr; - linkedit_fileoffset = fileoffset; - } - } - ptr += cmdsize; - } -} - -void test(struct libcache &cache) { - uint32_t libsystem_hash = - calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); - if (false) { // test search using name - void *printf_func = - custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_printf"); - printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf); - - void *vm_protect_func = - custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_vm_protect"); - printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func, - vm_protect); - - // using relative path - void *func_c_1 = - custom_dlsym(&cache, "./out/libb.dylib", "__Z15exported_from_cv"); - printf("Indirect search: Found=%p Expected=%p\n", func_c_1, - exported_from_c); - - // using rpath - void *func_c_2 = - custom_dlsym(&cache, "@rpath/libb.dylib", "__Z15exported_from_cv"); - printf("Indirect search: Found=%p Expected=%p\n", func_c_2, - exported_from_c); - } - - if (false) { // test search using hash of name - void *printf_func = custom_dlsym(&cache, libsystem_hash, "_printf"); - printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf); - - void *vm_protect_func = custom_dlsym(&cache, libsystem_hash, "_vm_protect"); - printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func, - vm_protect); - - void *realpath_func = - custom_dlsym(&cache, libsystem_hash, "_realpath$DARWIN_EXTSN"); - printf("Indirect search: Found=%p Expected=%p\n", realpath_func, realpath); - } + + secrets[0] = 'F'; + secrets[1] = 'R'; + secrets[2] = 'E'; + secrets[3] = 'E'; + secrets[4] = ' '; + secrets[5] = 'S'; + secrets[6] = 'P'; + secrets[7] = 'A'; + secrets[8] = 'C'; + secrets[9] = 'E'; + secrets[10] = '\n'; + secrets[11] = 0; }