From 925429c4a9aa17b0ba6a1576a0b4be4b7b19eb93 Mon Sep 17 00:00:00 2001 From: nganhkhoa Date: Thu, 22 Aug 2024 17:40:39 +0700 Subject: [PATCH] boilerplate code to research string removal --- research/strings_empty/.gitignore | 1 + research/strings_empty/build.sh | 75 + research/strings_empty/restore.cc | 1991 +++++++++++++++++ research/strings_empty/tests/c_code.c | 6 + research/strings_empty/tests/objc_code.m | 8 + research/strings_empty/tests/swift_code.swift | 1 + 6 files changed, 2082 insertions(+) create mode 100644 research/strings_empty/.gitignore create mode 100755 research/strings_empty/build.sh create mode 100644 research/strings_empty/restore.cc create mode 100644 research/strings_empty/tests/c_code.c create mode 100644 research/strings_empty/tests/objc_code.m create mode 100644 research/strings_empty/tests/swift_code.swift diff --git a/research/strings_empty/.gitignore b/research/strings_empty/.gitignore new file mode 100644 index 0000000..d42ab35 --- /dev/null +++ b/research/strings_empty/.gitignore @@ -0,0 +1 @@ +out/* diff --git a/research/strings_empty/build.sh b/research/strings_empty/build.sh new file mode 100755 index 0000000..a7fd74b --- /dev/null +++ b/research/strings_empty/build.sh @@ -0,0 +1,75 @@ +set -e +clear +VERSION=${1:-14} +OUT=./out +LOGIC=1 +make -C ../../macho-go +mkdir -p $OUT + +echo "using mach-o version $VERSION" +if [[ $VERSION -ge 14 ]] +then + echo "Resulting binary uses MODERN symbol resolver" +else + echo "Resulting binary uses LEGACY symbol resolver" +fi + +cat <<'fly' + ______ + _\ _~-\___ + = = ==(____AA____D + \_____\___________________,-~~~~~~~`-.._ + / o O o o o o O O o o o o o o O o |\_ + `~-.__ ___..----.. ) + `---~~\___________/------------````` + = ===(_________D +fly + +# this is a joke for those who knows +# https://www.blackhat.com/presentations/bh-dc-09/Iozzo/BlackHat-DC-09-Iozzo-let-your-mach0-fly-whitepaper.pdf +echo "make your Mach-O fly" + +if [[ $LOGIC -eq 0 ]] +then + +clang-format -i -style=llvm *.cc + +elif [[ $LOGIC -eq 1 ]] +then + +# build test binaries +clang -mmacosx-version-min=$VERSION -o $OUT/c_code tests/c_code.c +clang -fobjc-arc -ObjC -mmacosx-version-min=$VERSION -o $OUT/objc_code tests/objc_code.m +swiftc -o $OUT/swift_code tests/swift_code.swift + +# c program +../../macho-go/bin/ios-wrapper pepe -o $OUT/c_code_fixed -b $OUT/c_code.bcell -l $OUT/librestore_c.dylib --remove-strings $OUT/c_code +../../macho-go/bin/ios-wrapper bcell2header -b $OUT/c_code.bcell -o $OUT/restore.h +clang++ -mmacosx-version-min=$VERSION -o $OUT/librestore_c.dylib -shared -Wl,-reexport_library restore.cc + +# objc program +# ../../macho-go/bin/ios-wrapper pepe -o $OUT/objc_code_fixed -b $OUT/objc_code.bcell -l $OUT/librestore_objc.dylib --remove-strings $OUT/objc_code +# ../../macho-go/bin/ios-wrapper bcell2header -b $OUT/c_code.bcell -o $OUT/restore.h +# clang++ -mmacosx-version-min=$VERSION -o $OUT/librestore_objc.dylib -shared -Wl,-reexport_library restore.cc + +# swift program +# ../../macho-go/bin/ios-wrapper pepe -o $OUT/swift_code_fixed -b $OUT/swift_code.bcell -l $OUT/librestore_swift.dylib --remove-strings $OUT/swift_code +# ../../macho-go/bin/ios-wrapper bcell2header -b $OUT/c_code.bcell -o $OUT/restore.h +# clang++ -mmacosx-version-min=$VERSION -o $OUT/librestore_swift.dylib -shared -Wl,-reexport_library restore.cc + +# executable +chmod +x $OUT/c_code_fixed +# chmod +x $OUT/objc_code_fixed +# chmod +x $OUT/swift_code_fixed + +# resign +codesign --force --deep -s - $OUT/c_code_fixed +# codesign --force --deep -s - $OUT/objc_code_fixed +# codesign --force --deep -s - $OUT/swift_code_fixed + +# run +$OUT/c_code_fixed +# $OUT/objc_code_fixed +# $OUT/swift_code_fixed + +fi diff --git a/research/strings_empty/restore.cc b/research/strings_empty/restore.cc new file mode 100644 index 0000000..49702eb --- /dev/null +++ b/research/strings_empty/restore.cc @@ -0,0 +1,1991 @@ +#include +#include +#include +#include +#include +#include + +// #include +#include + +#include "out/restore.h" + +char *pwd; +uint32_t pwd_len; +clock_t start, end; +#define ISARM(header) ((*((uint32_t *)(header)+1) & 0xff) == 0xc) + +int custom_strcmp(const char *p1, const char *p2) { + const unsigned char *s1 = (const unsigned char *)p1; + const unsigned char *s2 = (const unsigned char *)p2; + unsigned char c1, c2; + do { + c1 = (unsigned char)*s1++; + c2 = (unsigned char)*s2++; + if (c1 == '\0') + return c1 - c2; + } while (c1 == c2); + return c1 - c2; +} + +int custom_strncmp(const char *s1, const char *s2, register size_t n) { + register unsigned char u1, u2; + + while (n-- > 0) { + u1 = (unsigned char)*s1++; + u2 = (unsigned char)*s2++; + if (u1 != u2) + return u1 - u2; + if (u1 == '\0') + return 0; + } + return 0; +} + +void set_cwd(const char *const *envp) { + while (*envp) { + // PWD= + if (0x3d445750 == *(uint32_t *)(*envp)) { + break; + } + envp++; + } + pwd = (char *)(*envp + 4); + for (; pwd[pwd_len] != 0; pwd_len++) + ; +} + +const uint32_t magic64 = 0xfeedfacf; +const uint32_t magic32 = 0xfeedface; + +struct libcache_item { + void *header; + void *trie; + uint32_t trie_size; + uint32_t hash; + + uint64_t slide; + + // pointer to segment address + uint32_t nsegment; + uint64_t *segment; +}; + +struct libcache { + struct libcache_item *libs; + uint32_t size; + + void *main; + void *thislib; + void *libdyld; + + int nrpath; + char **rpaths; +}; + +uint32_t fnv_hash_extend(const char *str, uint32_t h) { + unsigned char *s = (unsigned char *)str; /* unsigned string */ + + /* See the FNV parameters at www.isthe.com/chongo/tech/comp/fnv/#FNV-param */ + const uint32_t FNV_32_PRIME = 0x01000193; /* 16777619 */ + + // uint32_t h = 0x811c9dc5; /* 2166136261 */x + while (*s != 0) { + /* xor the bottom with the current octet */ + h ^= *s++; + /* multiply by the 32 bit FNV magic prime mod 2^32 */ + h *= FNV_32_PRIME; + } + + return h; +} +uint32_t fnv_hash(const char *str) { return fnv_hash_extend(str, 0x811c9dc5); } + +// try these hashes +// https://gist.github.com/sgsfak/9ba382a0049f6ee885f68621ae86079b + +// calculate the hash to search +// _dyld_get_image_name returns the full path to the library +// while the static path in LC_DYLIB (and such) could be relative +// we should expand the path to fullpath to correctly compute the hash +// +// the hardest part is the @rpath, because there can be many LC_RPATH +// and @rpath can also reference @loader_path +uint32_t calculate_libname_hash(const libcache *cache, const char *name) { + uint32_t hash; + uint32_t (*hash_func)(const char *) = fnv_hash; + if (name[0] == '.') { + // resolve relative path with ./ ../ ../../ and so on + char *p = realpath(name, 0); + hash = hash_func(p); + free(p); + } else if (name[0] == '@') { + // TODO: resolve @rpath + // ohyeah this is gonna be wild + // loop through all rpath and resolve that rpath + // then resolve the full path for all rpath + // + // which rpath is correct can be done by checking if the cache has that hash + for (int i = 0; i < cache->nrpath; i++) { + char *rpath = cache->rpaths[i]; + char *p = realpath(rpath, 0); + hash = hash_func(p); + hash = fnv_hash_extend(&name[6], hash); + for (size_t j = 0; j < cache->size; j++) { + if (cache->libs[j].hash == hash) { + free(p); + return hash; + } + } + free(p); + } + // printf("resolver for @rpath is not supported yet\n"); + } else { + hash = hash_func(name); + } + return hash; +} + +// dummy no sus function to look for dyld header +// i don't know if dyld_stub_binder should be better +// because if they are not familiar with dyld +// they would not suspect dyld_stub_binder inside modern macho +// Added iOS 6, macOS 10.8 +extern "C" uint32_t dyld_get_sdk_version(const mach_header *mh); +void exported_from_c(); + +void decode_uleb128(char *&addr, uint32_t *ret) { + uint32_t result = 0; + int shift = 0; + + while (1) { + unsigned char byte = *(unsigned char *)(addr); + addr++; + + result |= (byte & 0x7f) << shift; + shift += 7; + + if (!(byte & 0x80)) + break; + } + + *ret = result; +} + +void *find_header(void *_func) { + // Approach 1: (not stable) + // we assume that text section is small enough to fit on 1 page + // so the header should stay at the top of the page due to allocation logic + // the slice/slide is random but always align 0x1000 so we test a few values + // to see if the magic value is found + // + // Guaranteed to stop, but search range is small + + // const uint64_t page_size = 0x4000; + // uint64_t func = (uint64_t)_func; + // uint64_t potential_head = func + (0x4000 - (func % page_size)); + // void* head = 0; + // for (uint64_t i = 0x1000; i < 0xf000; i+=0x1000) { + // uint32_t* x = (uint32_t*)(potential_head - i); + // if (*x == magic64 || *x == magic32) { + // head = (void*)x; + // break; + // } + // } + // return head; + + // Approach 2: (more stable) + // We know that the header is 0x1000 aligned, + // just loop until the magic value is found + // Using while loop so ¯\_(ツ)_/¯ + const uint64_t page_size = 0x1000; + uint64_t func = (uint64_t)_func; + uint64_t potential_head = func + (0x1000 - (func % page_size)); + + void *head = 0; + uint32_t *x = (uint32_t *)(potential_head); + while (*x != magic64 && *x != magic32) { + x -= 0x1000 / 4; + } + return (void *)x; +} + +uint64_t get_slide(const void *header) { + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + uint64_t slice = 0; + const uint32_t ncmds = *((uint32_t *)header + 4); + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + uint64_t vmaddr = *((uint64_t *)ptr + 3); + uint64_t fileoffset = *((uint64_t *)ptr + 5); + if (custom_strcmp(name, "__TEXT") == 0) { + slice = (uint64_t)header - vmaddr; + return slice; + } + } + ptr += cmdsize; + } + return 0; +} + +void *get_selfbind(const void *header) { + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char *command_ptr = ptr; + + uint64_t slide; + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + uint64_t vmaddr = *((uint64_t *)ptr + 3); + uint64_t fileoffset = *((uint64_t *)ptr + 5); + // this assumes that __TEXT comes before __DATA_CONST + if (custom_strcmp(name, "__TEXT") == 0) { + slide = (uint64_t)header - vmaddr; + } else if (custom_strcmp(name, "__DATA") == 0) { + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + sections_ptr += (16 * 2 + 8 * 2 + 4 * 8) * (nsect - 1); + + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + if (custom_strcmp(secname, "selfbind") == 0) { + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + uint32_t *data_ptr = (uint32_t *)(addr + slide); + return (void *)data_ptr; + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } + } + ptr += cmdsize; + } + return 0; +} + +void print_macho_summary(const void *header) { + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + uint64_t linkedit_vmaddr; + uint64_t linkedit_fileoffset; + uint64_t slide; + printf("parsing macho at %p\n", header); + printf("ncmds %x\n", ncmds); + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + printf(" cmd %x %x\n", cmd, cmdsize); + if (cmd == LC_DYLD_EXPORTS_TRIE) { + const uint32_t offset = *((uint32_t *)ptr + 2); + const uint32_t size = *((uint32_t *)ptr + 3); + printf(" export trie: offset=0x%x size=0x%x\n", offset, size); + } + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + uint64_t vmaddr = *((uint64_t *)ptr + 3); + uint64_t vmsize = *((uint64_t *)ptr + 4); + uint64_t fileoffset = *((uint64_t *)ptr + 5); + uint64_t filesize = *((uint64_t *)ptr + 6); + if (custom_strcmp(name, "__TEXT") == 0) { + slide = (uint64_t)header - vmaddr; + printf(" --- slide=0x%llx ---\n", slide); + } else if (custom_strcmp(name, "__LINKEDIT") == 0) { + linkedit_vmaddr = vmaddr; + linkedit_fileoffset = fileoffset; + } + printf(" Segment %s\n", name); + printf(" vmaddr=0x%llx fileoffset=0x%llx\n", vmaddr, fileoffset); + printf(" vmsize=0x%llx filesize=0x%llx\n", vmsize, filesize); + + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + uint32_t fileoffset = *((uint32_t *)sections_ptr + 6 * 2); + printf(" Section %s\n", sections_ptr); + printf(" addr=0x%llx size=0x%llx fileoffset=0x%x\n", addr, size, + fileoffset); + } + } + if (cmd == LC_SYMTAB) { + uint32_t symoff = *((uint32_t *)ptr + 2); + uint32_t nsym = *((uint32_t *)ptr + 3); + uint32_t stroff = (*((uint32_t *)ptr + 4)); + uint32_t strsize = *((uint32_t *)ptr + 5); + + struct symbol_t { + uint32_t strx; + uint8_t flags; + uint8_t sect; + uint16_t desc; + uint64_t value; + }; + + uint64_t symtab_start = + (uint64_t)symoff - linkedit_fileoffset + slide + linkedit_vmaddr; + uint64_t stroff_start = + (uint64_t)stroff - linkedit_fileoffset + slide + linkedit_vmaddr; + + printf(" symtab: offset=0x%x nsym=0x%x\n", symoff, nsym); + for (int j = 0; j < nsym; j++) { + struct symbol_t *symtab = (struct symbol_t *)symtab_start; + struct symbol_t symbol = symtab[j]; + char *name = (char *)stroff_start + symbol.strx; + printf(" %s %llx => %p\n", name, symbol.value, + (void *)(symbol.value + slide)); + } + } + if (cmd == LC_REEXPORT_DYLIB) { + uint32_t name_offset = *((uint32_t *)ptr + 2); + char *name = (char *)ptr + name_offset; + printf(" reexport lib %s\n", name); + } + ptr += cmdsize; + } +} + +void *get_export_trie(const void *header, uint32_t &size) { + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + uint64_t slice = 0; + uint64_t linkedit_vmaddr = 0; + uint64_t linkedit_fileoffset = 0; + const uint32_t ncmds = *((uint32_t *)header + 4); + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_DYLD_EXPORTS_TRIE) { + const uint32_t offset = *((uint32_t *)ptr + 2); + size = *((uint32_t *)ptr + 3); + uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset; + return (void *)(linkedit_vmaddr + slice + offset_in_linkedit); + } + if (cmd == LC_DYLD_INFO_ONLY) { + const uint32_t offset = *((uint32_t *)ptr + 10); + size = *((uint32_t *)ptr + 11); + uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset; + return (void *)(linkedit_vmaddr + slice + offset_in_linkedit); + } + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + uint64_t vmaddr = *((uint64_t *)ptr + 3); + uint64_t fileoffset = *((uint64_t *)ptr + 5); + if (custom_strcmp(name, "__TEXT") == 0) { + slice = (uint64_t)header - vmaddr; + } else if (custom_strcmp(name, "__LINKEDIT") == 0) { + linkedit_vmaddr = vmaddr; + linkedit_fileoffset = fileoffset; + } + } + ptr += cmdsize; + } + return 0; +} + +uint32_t should_follow_symbol(char *&buffer, char *&_find) { + // printf("follow check %s has prefix: %s\n", _find, buffer); + char *find = _find; + char is_prefix = true; + while (1) { + int find_end = *find == 0; + int buffer_end = *buffer == 0; + int check = *buffer == *find; + // printf("check is %x == %x\n", *buffer, *find); + + if (buffer_end) { + // we must always run to the end of buffer, marked 0x00 + buffer++; + break; + } + if (find_end) { + // symbol to find is shorter than current buffer string + // but we still need to run to the end of buffer + // so just set not prefix + is_prefix = false; + } + if (!check) { + is_prefix = false; + } + buffer++; + find++; + } + // only move forward if is_prefix + if (is_prefix) { + _find = find; + // printf("prefix is found\n"); + } + return is_prefix; +} + +void *find_in_export_trie(const void *header, void *trie, char *&symbol) { + uint32_t func = 0; + + char *ptr = (char *)trie; + char *find = (char *)symbol; + while (1) { + // terminal node will have data + uint32_t data_count = 0; + decode_uleb128(ptr, &data_count); + if (data_count != 0 && *find == 0) { + // printf("reached terminal node\n"); + break; + } else if (data_count) { + // still need to follow the branch + ptr += data_count; + } + char num_child = ptr[0]; + ptr++; + + int still_following = 0; + for (char i = 0; i < num_child; i++) { + still_following = should_follow_symbol(ptr, find); + uint32_t follow_offset; + decode_uleb128(ptr, &follow_offset); + if (still_following) { + ptr = (char *)trie + follow_offset; + break; + } + } + + if (!still_following) { + // symbol not found + return 0; + } + } + + char count = *(ptr - 1); + uint8_t flag = *ptr++; // flags + // uleb128 offset + decode_uleb128(ptr, &func); + + if (flag == 0x8 /*re-export*/) { + // this hits a re-export symbol but with another name + // usually, the re-export is the same name on another library + // but somehow, for system libraries, a lot of symbols are + // renamed and re-exported from another library + // probably this was to build wrappers and + // have custom platform optimizations + // + // example of these is _strlen in libsystem_c.dylib + // is re-exported from __platform_strlen in libplatform + // + // The purpose of using char*& is to change the symbol searching + // to another symbols and do it quickly using references (pointer) + // + // we return 0 so the dlsym continues to search, + // but with another symbol name because the symbol points to another string + symbol = ptr; + return 0; + } + return (void *)((char *)header + func); +} + +void *find_in_lib(struct libcache *cache, struct libcache_item *lib, + char *&symbol); + +void *find_in_reexport(struct libcache *cache, struct libcache_item *lib, + char *&symbol) { + void *header = lib->header; + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd != LC_REEXPORT_DYLIB) { + ptr += cmdsize; + continue; + } + uint32_t name_offset = *((uint32_t *)ptr + 2); + char *name = (char *)ptr + name_offset; + uint32_t hash = calculate_libname_hash(cache, name); + for (int j = 0; j < cache->size; j++) { + struct libcache_item *reexport = &cache->libs[j]; + if (reexport->hash != hash) { + continue; + } + void *found = find_in_lib(cache, reexport, symbol); + if (found) + return found; + } + ptr += cmdsize; + } + return 0; +} + +void *find_in_lib(struct libcache *cache, struct libcache_item *lib, + char *&symbol) { + void *direct = find_in_export_trie(lib->header, lib->trie, symbol); + if (direct) { + return direct; + } + // cannot find in directly exported trie, loop through all reexport libs + return find_in_reexport(cache, lib, symbol); +} + +// the current logic of dlsym is not correct, but it works for PoC +// +// dlsym searchs and match libraries based on the LC_DYLD_ID load command +// while for our PoC, we use the paths of libraries to search for them +// +// for performance reasons, we do not compare the paths as strings +// we instead use a simple hash to carry out comparision +// using hashes allows us to compare integers and would be faster +void *custom_dlsym(struct libcache *cache, uint32_t hash, const char *symbol) { + for (size_t i = 0; i < cache->size; i++) { + struct libcache_item *cache_lib = &cache->libs[i]; + if (cache_lib->hash == hash) { + // read find_in_export_trie comments to know the use of char*& + // + // this code is for when the symbol searching references + // a previous item in search chain + // + // For example: + // searching for X in [A, B, C], + // C has X but it is a re-export from B with the name Y + // then we have to perform a search again from the top + // but with symbol Y + char **symbol_copy = (char **)&symbol; + void *func = find_in_lib(cache, cache_lib, *symbol_copy); + if (*symbol_copy != symbol) { + func = find_in_lib(cache, cache_lib, *symbol_copy); + } + return func; + } + } + return 0; +} + +void *custom_dlsym(struct libcache *cache, const char *libname, + const char *symbol) { + uint32_t hash = calculate_libname_hash(cache, libname); + return custom_dlsym(cache, hash, symbol); +} + +void bootstrap_libcache_item(struct libcache_item *item, const void *header, + const char *name) { + item->header = (void *)header; + item->trie = get_export_trie(header, item->trie_size); + + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char *command_ptr = ptr; + + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + if (custom_strcmp(name, "__TEXT") == 0) { + uint64_t vmaddr = *((uint64_t *)ptr + 3); + item->slide = (uint64_t)header - vmaddr; + } + item->nsegment++; + } + ptr += cmdsize; + } + + ptr = command_ptr; + item->segment = (uint64_t *)malloc(sizeof(uint64_t) * item->nsegment); + for (int i = 0, segment_i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + uint64_t vmaddr = *((uint64_t *)ptr + 3); + item->segment[segment_i++] = (vmaddr + item->slide); + } + ptr += cmdsize; + } + return; +} + +struct libcache_item *get_libcache_with_name(struct libcache *cache, + const char *name) { + void *to_find = 0; + if (custom_strcmp(name, "main") == 0) { + to_find = cache->main; + } else if (custom_strcmp(name, "thislib") == 0) { + to_find = cache->thislib; + } + uint32_t hash = calculate_libname_hash(cache, name); + for (int i = 0; i < cache->size; i++) { + struct libcache_item *cache_lib = &cache->libs[i]; + // search by hash or by pointer for special case + if (cache_lib->hash == hash || cache_lib->header == to_find) { + return cache_lib; + } + } + return 0; +} + +void dump_export_trie(const void *trie, uint32_t size, const char *filename) { + FILE *outfile = fopen(filename, "wb"); + fwrite((char *)trie, size, 1, outfile); + fclose(outfile); +} + +void dump_export_trie_of(const char *libname, const libcache *cache, + const char *filename) { + uint32_t hash = calculate_libname_hash(cache, libname); + for (int i = 0; i < cache->size; i++) { + struct libcache_item cache_lib = cache->libs[i]; + if (cache_lib.hash == hash) { + return dump_export_trie(cache_lib.trie, cache_lib.trie_size, filename); + } + } +} + +void *find_in_symtab(const libcache_item *lib, const char *find) { + void *header = lib->header; + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char *command_ptr = ptr; + + uint64_t linkedit_vmaddr; + uint64_t linkedit_fileoffset; + uint64_t slide; + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SYMTAB) { + uint32_t symoff = *((uint32_t *)ptr + 2); + uint32_t nsym = *((uint32_t *)ptr + 3); + uint32_t stroff = (*((uint32_t *)ptr + 4)); + uint32_t strsize = *((uint32_t *)ptr + 5); + + struct symbol_t { + uint32_t strx; + uint8_t flags; + uint8_t sect; + uint16_t desc; + uint64_t value; + }; + + uint64_t symtab_start = + (uint64_t)symoff - linkedit_fileoffset + slide + linkedit_vmaddr; + uint64_t stroff_start = + (uint64_t)stroff - linkedit_fileoffset + slide + linkedit_vmaddr; + + for (int j = 0; j < nsym; j++) { + struct symbol_t *symtab = (struct symbol_t *)symtab_start; + struct symbol_t symbol = symtab[j]; + char *name = (char *)stroff_start + symbol.strx; + if (custom_strcmp(name, find) == 0) { + return (void *)(symbol.value + slide); + } + } + break; + } + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + uint64_t vmaddr = *((uint64_t *)ptr + 3); + uint64_t fileoffset = *((uint64_t *)ptr + 5); + if (custom_strcmp(name, "__TEXT") == 0) { + slide = (uint64_t)header - vmaddr; + } else if (custom_strcmp(name, "__LINKEDIT") == 0) { + linkedit_vmaddr = vmaddr; + linkedit_fileoffset = fileoffset; + } + } + ptr += cmdsize; + } + return 0; +} + +void *find_in_symtab(const char *libname, const libcache *cache, + const char *find) { + uint32_t hash = calculate_libname_hash(cache, libname); + struct libcache_item *cache_lib = 0; + for (int i = 0; i < cache->size; i++) { + if (cache->libs[i].hash == hash) { + cache_lib = &(cache->libs[i]); + break; + } + } + return find_in_symtab(cache_lib, find); +} +int hook_printf(const char *format, ...) { + va_list args; + va_start(args, format); + + printf("HOOKED BEGIN LOL\n"); + int status = printf(format, args); + printf("HOOKED END LOL\n"); + + va_end(args); + return status; +} + +typedef void *(*readClass_t)(void *, bool, bool); +typedef void *(*realizeClassWithoutSwift_t)(void *, void *); +typedef void *(*remapClass_t)(void *); +typedef void *(*load_method_t)(void *, void *); +typedef void *(*sel_lookUpByName_t)(const char *); +typedef void (*addClassTableEntry_t)(void *); +typedef void (*schedule_class_load_t)(void *); + +typedef void *(*objc_autoreleasePoolPush_t)(); +typedef void (*objc_autoreleasePoolPop_t)(void *); + +struct custom_initializer_t { + // used for Objective-C load methods + uint64_t *loadable_classes; + uint32_t *loadable_classes_used; + sel_lookUpByName_t sel_lookUpByName; + objc_autoreleasePoolPush_t objc_autoreleasePoolPush; + objc_autoreleasePoolPop_t objc_autoreleasePoolPop; + remapClass_t remapClass; + schedule_class_load_t schedule_class_load; + uint64_t *cls; + size_t ncls; + // used for constructors + void *programvars; + uint64_t *constructors; + size_t nconstructors; +}; + +// global variable for PoC +struct custom_initializer_t *custom_initializer_i; + +struct ProgramVars { + void *mh; // mach_header or mach_header64 + int *NXArgcPtr; + const char ***NXArgvPtr; + const char ***environPtr; + const char **__prognamePtr; +}; + +void build_cache(struct libcache &cache, void *main); +void fix(struct libcache &cache); +void find_all_rpath(struct libcache &cache, void *main); + +void test(struct libcache &cache); + +void restore_strings(void); + +__attribute__((constructor)) static void +bruh(int argc, const char *const argv[], const char *const envp[], + const char *const apple[], const struct ProgramVars *vars) { + start = clock(); + printf("=== rebuilding the strings ===\n"); + + restore_strings(); +} + +/// strings in __TEXT,__cstring has been removed and this +/// function tries to recover those strings. Using either +/// these methods below. +/// +/// 1. Recover __TEXT,__cstring +/// 2. Build a new segment with section for strings +/// +/// (1) might seem reasonable at first, but requires __TEXT +/// segment to be writable. Although we can make that, but +/// we are not sure if the modification is allowed by Apple. +/// +/// (2) actually require a little bit more work, by defining +/// a new segment with a section inside. This segment is +/// mounted readable/writable. Not only that, all string +/// references must also be updated. +/// In code, ARMv8, the sequence `adrp` `add` referencing +/// string must now be updated with new parameters as the +/// address/offset has now been changed. +/// In ARMv8, every instruction is 8 bytes, so looping +/// through all the code and change the instruction is easy. +/// +/// It can be seen that opting for method (2) is safer, +/// as Apple allows for arbitrary segment. This option +/// requires that there is enough space left for a new segment. +/// Calculated, it should be around 152 bytes. +/// +/// 4 + 4 + 16 + 8*4 + 4*4 + 16 + 16 + 8*2 + 4*8 +/// ^~~~^ ^~~~~~~~~~~~~^ ^~~~~~~~~~~~~~~~~~^ +/// 1 2 3 +/// +/// 1: load command header +/// 2: segment data +/// 3: section +/// +/// However, if we can expand the old section, and remove the +/// old section entry, then we only need 72 bytes. Because, +/// we only move the section entry. +/// +/// Remember to update the command count in macho header (+1). +void restore_strings(void) { +} + +void build_cache(struct libcache &cache, void *main) { + const uint64_t main_slide = get_slide(main); + // Find our lib (mapped) file + const void *thislib = find_header((void *)bruh); + // Find dyld lib (mapped) file using a no-sus function + const void *libdyld = find_header((void *)dyld_get_sdk_version); + + cache.main = (void *)main; + cache.thislib = (void *)thislib; + cache.libdyld = (void *)libdyld; + uint32_t libsystem_hash = + calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); + + // From libdyld header, we can list exports table + // to find all function we want to use + // + // This way there is no leakage of functions we use to do our trick + // mostly to hide + // - _dyld_image_count + // - _dyld_get_image_name + // - _dyld_get_image_header + // - _dyld_get_image_vmaddr_slide + + // The above functions are crucial to find all libraries loaded + // From which we will traverse the exports table to replace + // _got and _la_symbol_pointer data + + // Our lib can hide more details too + // We can resolve all functions we use + // before resolving the main executable imports + // + // This will make our lib use only dyld_get_sdk_version + // For the main executable, imports are empty due to manual resolve + + printf("executable header at %p\n", main); + printf("lib header at %p\n", thislib); + printf("libdyld header at %p\n", libdyld); + + find_all_rpath(cache, main); + uint32_t trie_size; + void *libdyld_export_trie = get_export_trie(libdyld, trie_size); + + // we have to traverse the trie to find these symbols + // because if we self-rebuild import table for **this** lib, + // these symbols aren't resolved + // so we have to resolve ourselves and then rebuild the symbols for others + typedef int (*dyld_image_count_t)(void); + typedef char *(*dyld_get_image_name_t)(int); + typedef void *(*dyld_get_image_header_t)(int); + + char *dyld_image_count_s = (char *)"__dyld_image_count"; + int (*dyld_image_count_func)(void) = (dyld_image_count_t)find_in_export_trie( + libdyld, libdyld_export_trie, dyld_image_count_s); + + char *dyld_get_image_header_s = (char *)"__dyld_get_image_header"; + void *(*dyld_get_image_header_func)(int) = + (dyld_get_image_header_t)find_in_export_trie(libdyld, libdyld_export_trie, + dyld_get_image_header_s); + + char *dyld_get_image_name_s = (char *)"__dyld_get_image_name"; + char *(*dyld_get_image_name_func)(int) = + (dyld_get_image_name_t)find_in_export_trie(libdyld, libdyld_export_trie, + dyld_get_image_name_s); + + cache.size = dyld_image_count_func(); + cache.libs = + (struct libcache_item *)malloc(sizeof(struct libcache_item) * cache.size); + for (int i = 0; i < cache.size; i++) { + void *header = dyld_get_image_header_func(i); + char *name = dyld_get_image_name_func(i); + bootstrap_libcache_item(&cache.libs[i], header, name); + cache.libs[i].hash = calculate_libname_hash(&cache, name); + // printf("%p %s\n", header, name); + } +} + +// Function to find all rpath entries of the main executable +void find_all_rpath(struct libcache &cache, void *header) { + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + printf("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"); + const uint32_t ncmds = *((uint32_t *)header + 4); + printf("RPATHS:\n"); + cache.nrpath = 0; + for (uint32_t i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_RPATH) + cache.nrpath++; + ptr += cmdsize; + } + uint32_t idx = 0; + ptr = (char *)header; + ptr += (magic == magic64) ? 0x20 : 0x20 - 0x4; + cache.rpaths = (char **)malloc(sizeof(char *) * cache.nrpath); + for (uint32_t i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_RPATH) { + cache.rpaths[idx++] = (char *)ptr + 12; + printf("%s\n", cache.rpaths[idx - 1]); + } + ptr += cmdsize; + } + printf("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"); +} + +void fix_binds(struct libcache_item *libfixing, struct libcache *cache, + int n_ins, uint32_t *instructions, char *libs, char *symbols) { + uint32_t libsystem_hash = + calculate_libname_hash(cache, "/usr/lib/libSystem.B.dylib"); + + typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int); + typedef void *(*mach_task_self_t)(); + mach_task_self_t mach_task_self_func = + (mach_task_self_t)custom_dlsym(cache, libsystem_hash, "_mach_task_self"); + vm_protect_t vm_protect_func = + (vm_protect_t)custom_dlsym(cache, libsystem_hash, "_vm_protect"); + + int npage_rw_fixed = 0; + uint64_t page_rw_fixed[10]; // should be dynamic, but works for now + + int pc = 0; + for (; pc != n_ins;) { + uint32_t libidx = instructions[pc]; + uint32_t nsym = instructions[pc + 1]; + pc += 2; + + char *lib = libs + libidx; + for (int i = 0; i < nsym; i++) { + uint32_t op = instructions[pc]; + uint32_t offset = instructions[pc + 1]; + pc += 2; + + uint32_t symidx = op >> 8; + uint32_t segment = op & 0xff; + char *sym = symbols + symidx; + + uint64_t fix_at = offset + libfixing->segment[segment]; + + // enable WRITE protection for this data segment + int need_rw_fix = true; + for (int j = 0; j < npage_rw_fixed; j++) { + if (page_rw_fixed[j] <= fix_at && page_rw_fixed[j] + 0x1000 > fix_at) { + need_rw_fix = false; + } + } + if (need_rw_fix) { + uint64_t start_page = fix_at - (fix_at % 0x1000); + vm_protect_func(mach_task_self_func(), start_page, 0x1000, 0, + VM_PROT_READ | VM_PROT_WRITE); + page_rw_fixed[npage_rw_fixed++] = start_page; + printf("modify page starts at 0x%llx to RW\n", start_page); + } + + void *resolved = 0; + // search with hash is faster + // resolved = custom_dlsym(&cache, symbol.hash, symbol.name); + if (resolved == 0) { + // but fuck apple they have relative path and rpath + resolved = custom_dlsym(cache, lib, sym); + } + *(uint64_t *)fix_at = (uint64_t)resolved; + + printf("imports need to fix: %s at 0x%llx\n", sym, fix_at); + printf(" from=%s\n", lib); + printf(" segment id=%d; offset=0x%x;", segment, offset); + printf(" resolved=%llx(%p)\n", *(uint64_t *)fix_at, resolved); + } + } +} + +void fix_objc(struct libcache_item *libfixing, struct libcache &cache); +void fix_initializer(struct libcache_item *libfixing, struct libcache &cache); +void fix(struct libcache &cache) { + // now we have function to find exported symbols + // it supports full name search or hash search + // to reserve space, we use the hash search + // + // so we will collect all imported symbols, and its offset to fix + // with legacy symbol resolve + // __got always has dyld_stub_binder + // __la_symbol_ptr + // with modern symbol resolve + // __got now contains full rebase/bind opcode + // + // the list of all imported symbols should be + // [(offset, name, libhash)] + // if we want to also fix framework/libraries used by the main executable, + // (only those that are not governed by the system) + // we should also have extra list(s) for that lib to resolve ourselves + // + // main: [(offset, name, libhash)] + // libA: [(offset, name, libhash)] + // libB: [(offset, name, libhash)] + // + // using the list is temporary for PoC + // we know that many symbols are exported from 1 lib + // so we can build a trie (yes, more trie) + // where the symbols are now concatenated with libhash 4 bytes as prefix + // and the offset is at the terminal node + // + // this way, we can reduce the libhash, although we need to build a trie + // build the trie is harder than traversing it + // + // just an idea, if we can somehow reduce the datasize then it would be better + + // OBJC: + // In Objective-C, the binary is loaded with the Objective-C runtime + // This runtime (a library) install a hook on dyld for all images + // And because this runtime is a system runtime, the bootstrap step is already + // prepared The details on this runtime will be in a seperated document, below + // are some basics + // + // The compiler for Objective-C emits a bunch of details for the runtime in + // the binary itself These information are stored in sections with prefix name + // __objc, namely + // - __objc_classlist + // - __objc_clssrefs + // - __objc_selref + // - __objc_const + // - __objc_data + // + // Objective-C stores the class interface in the binary particulary in + // __objc_data This interface contains the superclass, metaclass, and a cache + // to methods pointers These information are either bound (by dyld) or built + // (by Objective-C runtime) + // + // One of the important routine in the Objective-C runtime is readClass. + // https://github.com/apple-oss-distributions/objc4/blob/689525d556eb3dee1ffb700423bccf5ecc501dbf/runtime/objc-runtime-new.mm#L3385 + // + // This function is not exported, however there is an entry in the symtab. + // By using this, we can find the its address + // + // Because __objc_data contains to-be-bound values, + // which will be resolved by dyld and referenced by Objective-C runtime later + // if we simply erase this value, reference(s) read by Objective-C runtime + // ensues a crash (through debugging, we know that the crash happens in + // readClass, realizeClassWithoutSwift) + // + // However, we can evade this by making the runtime thinks there is no class + // needs setup This can be done by changing the __objc_classlist to some other + // name or remove this section Because the runtime find the __objc_classlist + // section by name, and the size of the section is used to iterate through + // pointers. So if we change the name, the runtime will have no class to run + // setup. Or complete removal and call the setup by ourselves, because we know + // where the data is + // + // The setup is done through readClass function, as said above, its address + // can be found This function is pure C function so call into this function is + // easy + // + // Important function with their names: + // _readClass(objc_class*, bool, bool) + // mangled: __ZL9readClassP10objc_classbb + // + // _realizeClassWithoutSwift(objc_class*, objc_class*) + // mangled: __ZL24realizeClassWithoutSwiftP10objc_classS0_ + // + // _remapClass(objc_class*) + // mangled: __ZL10remapClassP10objc_class + // + // _addClassTableEntry(objc_class*, bool) + // magled: __ZL18addClassTableEntryP10objc_classb + + // NOTES: + // mach_task_self() has a conflicting symbol or something, + // in symbol table it's: _mach_task_self_ + // but have to search with: _mach_task_self + // + // so future replacement into mach_task_self has to use _mach_task_self + // despite the symbol is _mach_task_self_ + // + // may need to look into why this happens so we can deal with this more + // generic + + // resolve selfbind if exist + { // stored inside __DATA,selfbind + struct libcache_item *libfixing = get_libcache_with_name(&cache, "thislib"); + struct selfbind_t { + uint32_t liblist_offset; + uint32_t symbollist_offset; + }; + struct selfbind_t *selfbind = + (struct selfbind_t *)get_selfbind(libfixing->header); + + if (selfbind) { + char *libs = (char *)(selfbind + 1) + selfbind->liblist_offset; + char *symbols = (char *)(selfbind + 1) + selfbind->symbollist_offset; + uint64_t n_instructions = ((uint64_t)libs - (uint64_t)(selfbind + 1)) / 4; + uint32_t *encoded_table = (uint32_t *)(selfbind + 1); + + printf("[*] performing selfbind (instructions=%p)\n", selfbind); + fix_binds(libfixing, &cache, n_instructions, encoded_table, libs, + symbols); + } + } + + // the rest of the fixes are in main executable + printf("[*] performing bind for main executable\n"); + struct libcache_item *libfixing = get_libcache_with_name(&cache, "main"); + fix_binds(libfixing, &cache, bshield_data::n_instructions, + bshield_data::encoded_table, bshield_data::libs, + bshield_data::symbols); + + // TODO: Reformat the region as per before, or leave as it + // for (int j = 0; j < npage_rw_fixed; j++) { + // uint64_t start_page = page_rw_fixed[j]; + // vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0, + // VM_PROT_READ); + // } + + // Encrypted __TEXT segment + // char* text_start = (char*)libfixing->header + 0x3000; + // vm_protect_func(mach_task_self_func(), (uint64_t)text_start, 0x1000, 0, + // VM_PROT_READ | VM_PROT_WRITE); + // printf("text fix at %p\n", text_start + 0xb8c); + // for (int i = 0; i < 0x2ac; i++) { + // text_start[0xb8c + i] = text_start[0xb8c + i] ^ 0xcc; + // } + + fix_objc(libfixing, cache); + fix_initializer(libfixing, cache); + + // _TEXT must be RX or RW no RWX + // vm_protect_func(mach_task_self_func(), (uint64_t)text_start, 0x1000, 0, + // VM_PROT_READ | VM_PROT_EXECUTE); +} + +void volatile custom_initializer(int argc, const char *const argv[], + const char *const envp[], + const char *const apple[]) { + printf("[+] run custom initializers\n"); + + if (custom_initializer_i->cls != 0) { + // for Objective-C load + uint64_t *loadable_classes = custom_initializer_i->loadable_classes; + uint32_t *loadable_classes_used = + custom_initializer_i->loadable_classes_used; + sel_lookUpByName_t sel_lookUpByName = + custom_initializer_i->sel_lookUpByName; + objc_autoreleasePoolPop_t objc_autoreleasePoolPop = + custom_initializer_i->objc_autoreleasePoolPop; + objc_autoreleasePoolPush_t objc_autoreleasePoolPush = + custom_initializer_i->objc_autoreleasePoolPush; + remapClass_t remapClass = custom_initializer_i->remapClass; + schedule_class_load_t schedule_class_load = + custom_initializer_i->schedule_class_load; + + for (int i = 0; i < custom_initializer_i->ncls; i++) { + void *cls0 = (void *)custom_initializer_i->cls[i]; + void *cls = remapClass(cls0); + if (!cls) + continue; + schedule_class_load(cls); + } + + printf("loadable_classes %llx %x\n", *loadable_classes, + *loadable_classes_used); + + struct loadable_class_t { + void *cls; + void *method; + }; + struct loadable_class_t *classes = + (struct loadable_class_t *)*loadable_classes; + int used = *loadable_classes_used; + *loadable_classes = 0; + // *loadable_classes_allocated = 0; + *loadable_classes_used = 0; + void *sel = sel_lookUpByName("load"); + // Call all +loads for the detached list. + void *pool = objc_autoreleasePoolPush(); + for (int i = 0; i < used; i++) { + void *cls = classes[i].cls; + load_method_t load_method = (load_method_t)classes[i].method; + printf("call load of class %p %p\n", cls, load_method); + if (!cls) + continue; + (load_method)(cls, sel); + } + // Destroy the detached list. + if (classes) + free(classes); + objc_autoreleasePoolPop(pool); + } + + // for constructors + if (custom_initializer_i->constructors) { + typedef void *(*constructors_t)(int, void *, void *, void *, void *); + uint32_t nconst = custom_initializer_i->nconstructors; + for (int i = 0; i < nconst; i++) { + constructors_t cons = + (constructors_t)custom_initializer_i->constructors[i]; + printf("call initializer at %p\n", cons); + cons(argc, (void *)argv, (void *)envp, (void *)apple, + custom_initializer_i->programvars); + } + free(custom_initializer_i->constructors); + } + + printf("[+] initializers completed\n"); + free(custom_initializer_i); + end = clock(); + double cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC; + printf("restoration library time: %lf\n", cpu_time_used); +} + +void fix_objc_classdata(struct libcache_item *libfixing, struct libcache &cache); +void fix_class_refs(struct libcache_item *libfixing, struct libcache &cache); +void run_objc_readclass(struct libcache_item *libfixing, struct libcache &cache); + +// method are splited into 3 kinds, but for simplicity, we think of it as +// 2 kinds: big and small +// our example are small method list, which all pointers are relative and 32-bit +// the size should be 0xc == 12 but we have padding 4-byte 0x0 for some reason? +union _objc_method{ + struct { + const char* name; + const char* types; + void* imp; + }; + struct { + int32_t sel_offset; + int32_t typ_offset; + int32_t imp_offset; + }; +}; + +struct method_t { + const char* name; /* Pointer to name (or selector reference?) */ + const char* types; /* Pointer to type info */ + void* imp; /* Pointer to implementation (code) */ +}; + +// entsize & 0x80000000 is small method kind +// entsize = kind | sizeof(_objc_method) +struct _method_list_t { + uint32_t entsize; // sizeof(struct _objc_method) + uint32_t method_count; + union _objc_method method_list[]; +}; + +struct _class_ro_t { + uint32_t flags; + uint32_t const instanceStart; + uint32_t const instanceSize; + uint32_t const reserved; // only when building for 64bit targets + const uint8_t * const ivarLayout; + const char *const name; + struct _method_list_t * baseMethods; + const /*struct _protocol_list_t*/void *const baseProtocols; + const /*struct _ivar_list_t*/void *const ivars; + const uint8_t * const weakIvarLayout; + const /*struct _prop_list_t*/void *const properties; +}; + +struct _class_t { + struct _class_t *isa; + struct _class_t * superclass; + void *cache; + void *vtable; + struct _class_ro_t *ro; +}; +void fix_objc(struct libcache_item *libfixing, struct libcache &cache) { + printf("[+] dealing with Objective-C\n"); +#ifdef METH1 + fix_objc_classdata(libfixing, cache); +#endif +#ifdef METH3 + printf("METH3\n"); + fix_class_refs(libfixing, cache); +#endif + run_objc_readclass(libfixing, cache); +} + +void test_objc_hijack(void* self, void* selector, void* input) { + printf("[Foo tobehijacked] function is HIJACKED\n"); + printf("arg1=%p arg2=%p arg3=%p\n", self, selector, input); +} + +// a subroutine to perform hooking of fixed-binary classes +// by iterating in the __objc_classref which internally points to +// __objc_data for a list of _class_t structs +// each _classt_t has a _class_ro_t containing pointers to +// the components of an instance, including methods, properties, ivars, ... +// +// in this function, we only work on hooking/hijacking of class methods +// by fixing the method list which to be read by Objective-C runtime during readClass +// the method list is a list of {selector, type, implementation} (all pointers) +// by fixing the implementation (should point to a function) the readClass +// thinks that it is the function associated with the method name/selector +// +// by now, all rebases have been rebased and pointers should be pointing correctly +// however, selectors are to be constructed, unless erased +void fix_objc_classdata(struct libcache_item *libfixing, struct libcache &cache) { + void *header = libfixing->header; + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char *command_ptr = ptr; + + uint64_t linkedit_vmaddr; + uint64_t linkedit_fileoffset; + uint64_t slide; + + uint64_t methlist_start; + uint64_t methlist_size; + + uint32_t libsystem_hash = + calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); + typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int); + typedef void *(*mach_task_self_t)(); + mach_task_self_t mach_task_self_func = + (mach_task_self_t)custom_dlsym(&cache, libsystem_hash, "_mach_task_self"); + vm_protect_t vm_protect_func = + (vm_protect_t)custom_dlsym(&cache, libsystem_hash, "_vm_protect"); + + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + uint64_t vmaddr = *((uint64_t *)ptr + 3); + uint64_t fileoffset = *((uint64_t *)ptr + 5); + // this assumes that __TEXT comes before __DATA_CONST + if (custom_strcmp(name, "__TEXT") == 0) { + slide = (uint64_t)header - vmaddr; + + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + // to be able to fix method list for hooking, we need this section + // to be writable + if (custom_strncmp(secname, "__objc_methlist", 16) == 0) { + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + + methlist_start = addr + slide; + methlist_size = size; + + printf("setting __objc_methlist to RW: addr=%p size=%x\n", addr + slide, size); + vm_protect_func(mach_task_self_func(), methlist_start, methlist_size, 0, VM_PROT_READ | VM_PROT_WRITE); + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + + } else if (custom_strcmp(name, "__DATA") == 0) { + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + // we can iterate in the __objc_data rather than __objc_classref + // classref can also point to outside classes that are imported + if (custom_strncmp(secname, "__objc_data", 16) == 0) { + + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + struct _class_t *data_ptr = (struct _class_t *)(addr + slide); + + for (int nclass = 0; nclass < size / sizeof(struct _class_t); nclass++, data_ptr++) { + // ro can be null for some reasons + // baseMethods is null if the class is a metaclass + if (!(data_ptr->ro && data_ptr->ro->baseMethods)) { + continue; + } + const char* class_name = data_ptr->ro->name; + struct _method_list_t* methods = data_ptr->ro->baseMethods; + for (int i_method = 0; i_method < methods->method_count; i_method++) { + // have to use reference because the relative offset is calculated with the variable address + // if not using reference, then the variable will be a COPY value and the address is localized + union _objc_method* method = &methods->method_list[i_method]; + if (methods->entsize & 0x80000000) { + const char* imp = *(char**)((char*)(&method->sel_offset) + method->sel_offset); + if (custom_strcmp(class_name, "Foo") == 0 && custom_strcmp(imp, "tobehijacked:") == 0) { + // char* current_imp = (char*)(&method->imp_offset) + method->imp_offset; + + // encode the relative pointer + uint64_t replace = (uint64_t)test_objc_hijack; + uint64_t original = (uint64_t)&method->imp_offset; + printf("modify the Objective-C method at %p\n", &method->imp_offset); + if (replace > original) { + method->imp_offset = (int32_t)(replace - original); + } else { + method->imp_offset = -(int32_t)(original - replace); + } + } + + printf(" method=%p\n", method); + printf(" sel=%x --> %p\n", method->sel_offset, (char*)(&method->sel_offset) + method->sel_offset); + printf(" %s\n", name); + printf(" typ=%x --> %s\n", method->typ_offset, (char*)&method->typ_offset + method->typ_offset); + printf(" fun=%x --> %p\n", method->imp_offset, (char*)(&method->imp_offset) + method->imp_offset); + } + else { + const char* imp = method->name; + if (custom_strcmp(class_name, "Foo") == 0 && custom_strcmp(imp, "tobehijacked:") == 0) { + void* replace = (void*)test_objc_hijack; + printf("modify the Objective-C method at %p with legacy format.\n", &method->imp); + method->imp = replace; + } + printf(" method=%p\n", method); + printf(" sel=%s\n", method->name); + printf(" typ=%p\n", method->types); + printf(" fun=%p\n", method->imp); + } + } + } + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } else if (custom_strcmp(name, "__LINKEDIT") == 0) { + linkedit_vmaddr = vmaddr; + linkedit_fileoffset = fileoffset; + } + } + ptr += cmdsize; + } + + // _TEXT must be RX or RW no RWX + vm_protect_func(mach_task_self_func(), methlist_start, methlist_size, 0, + VM_PROT_READ | VM_PROT_EXECUTE); +} + +uint64_t find_replace_cls_refs(struct libcache cache) { + void *header = cache.thislib; + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char *command_ptr = ptr; + uint64_t slide; + for (int i = 0; i < ncmds; i++){ + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64){ + char* name = (char*)((uint64_t*)ptr + 1); + uint64_t vmaddr = *((uint64_t*)ptr + 3); + if (custom_strcmp(name, "__TEXT") == 0) + slide = (uint64_t)header - vmaddr; + + if (custom_strcmp(name, "__DATA") == 0){ + uint64_t nsect = *((uint32_t*)ptr + 8 * 2); + char* sections_ptr = (char*)((uint32_t*)ptr + 18); + for (int sec = 0; sec < nsect; sec++){ + char* secname = sections_ptr; + if (custom_strncmp(secname, "__objc_data", 11) == 0){ + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + struct _class_t *data_ptr = (struct _class_t *)(addr + slide); + for (int nclass = 0; nclass < size / sizeof(struct _class_t); nclass++, data_ptr++) { + if (!data_ptr->ro) + continue; + if (data_ptr->ro->flags & 0x01) { continue; } + if (custom_strcmp(data_ptr->ro->name, "Hooker") == 0){ + printf("Found Hooker @ %p\n", data_ptr); + return (uint64_t)data_ptr; + } + } + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } + } + ptr += cmdsize; + } +} + +void fix_class_refs(struct libcache_item *libfixing, struct libcache &cache) { + uint64_t replace = find_replace_cls_refs(cache); + void *header = libfixing->header; + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char *command_ptr = ptr; + uint64_t slide; + for (int i = 0; i < ncmds; i++){ + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64){ + char* name = (char*)((uint64_t*)ptr + 1); + uint64_t vmaddr = *((uint64_t*)ptr + 3); + if (custom_strcmp(name, "__TEXT") == 0) + slide = (uint64_t)header - vmaddr; + + if (custom_strcmp(name, "__DATA") == 0){ + uint64_t nsect = *((uint32_t*)ptr + 8 * 2); + char* sections_ptr = (char*)((uint32_t*)ptr + 18); + for (int sec = 0; sec < nsect; sec++){ + char* secname = sections_ptr; + if (custom_strncmp(secname, "__objc_classrefs", 16) == 0){ + uint64_t addr = *((uint64_t*)sections_ptr + 4) + slide; + uint64_t size = *((uint64_t*)sections_ptr + 5); + struct _class_t* target_clsref = NULL; + for (int nclass = 0; nclass < size / sizeof(uint64_t*); nclass++){ + target_clsref = (_class_t*)(*((uint64_t *)addr + nclass)); + // printf("Target clasref @ %p: %p\n", (uint64_t *)addr + nclass, target_clsref); + if (custom_strcmp(target_clsref->ro->name, "Foo") == 0){ + // TODO + printf("Target clasref @ %p: %p\n", (uint64_t *)addr + nclass, target_clsref); + *((uint64_t *)addr + nclass) = replace; + printf("New clasref @ %p: %p\n", (uint64_t *)addr + nclass, *((uint64_t *)addr + nclass)); + struct _class_t* hooker = (struct _class_t*)replace; + printf("superclass hooker: %p\n", target_clsref->superclass); + hooker->superclass = target_clsref; + printf("New superclass hooker: %p\n", hooker->superclass); + break; + } + } + + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } + } + ptr += cmdsize; + } +} + +void run_objc_readclass(struct libcache_item *libfixing, struct libcache &cache) { + // Manually run the Objective-C runtime for each class + // + + // use the snippet bellow to call class method + // because often the this pointer is stored in a different register + // so need to load that register in before calling the function + // + // void* foo = (void*)function_to_call; + // asm("movq %0, %%r12"::"r"(foo)); + // __asm__(".intel_syntax noprefix;" + // "mov rcx, 123;" + // "call r12;"); + + printf("fixing objective-c\n"); + void *header = libfixing->header; + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char *command_ptr = ptr; + + uint64_t linkedit_vmaddr; + uint64_t linkedit_fileoffset; + uint64_t slide; + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + uint64_t vmaddr = *((uint64_t *)ptr + 3); + uint64_t fileoffset = *((uint64_t *)ptr + 5); + // this assumes that __TEXT comes before __DATA_CONST + printf("segment %s\n", name); + if (custom_strcmp(name, "__TEXT") == 0) { + slide = (uint64_t)header - vmaddr; + + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + printf("section %s\n", secname); + if (custom_strncmp(secname, "__objc_methname", 16) == 0) { + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + uint64_t *data_ptr = (uint64_t *)(addr + slide); + // printf("methname addr %p : %s\n", data_ptr, (char*)data_ptr); + break; + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } else if (custom_strcmp(name, "__DATA") == 0) { + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + printf("section %s\n", secname); + if (custom_strncmp(secname, "__objc_selrefs", 16) == 0) { + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + uint64_t *data_ptr = (uint64_t *)(addr + slide); + + uint32_t trie_size; + char *symbol = (char *)"__dyld_get_objc_selector"; + void *libdyld = cache.libdyld; + void *libdyld_export_trie = get_export_trie(libdyld, trie_size); + typedef void *(*dyld_get_objc_selector_t)(const char *); + dyld_get_objc_selector_t dyld_get_objc_selector_func = + (dyld_get_objc_selector_t)find_in_export_trie( + libdyld, libdyld_export_trie, symbol); + + // resolve method names that cached in the dyld + for (int i = 0; i < bshield_data::n_selectors; i++) { + uint32_t idx = bshield_data::special_selectors_idx[i]; + const char *name = bshield_data::special_selectors_name[i]; + data_ptr[idx] = (uint64_t)dyld_get_objc_selector_func(name); + } + + typedef void *(*sel_lookUpByName_t)(const char *); + sel_lookUpByName_t sel_lookUpByName = + (sel_lookUpByName_t)custom_dlsym( + &cache, "/usr/lib/libobjc.A.dylib", "_sel_lookUpByName"); + printf("selector gogogo: %p\n", + sel_lookUpByName("dateFromString:")); + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } else if (custom_strcmp(name, "__DATA_CONST") == 0) { + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + printf("section %s\n", secname); + if (custom_strncmp(secname, "__objc_classbruh", 16) == 0) { + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + uint64_t *data_ptr = (uint64_t *)(addr + slide); + + readClass_t readClass = + (readClass_t)find_in_symtab("/usr/lib/libobjc.A.dylib", &cache, + "__ZL9readClassP10objc_classbb"); + realizeClassWithoutSwift_t realizeClassWithoutSwift = + (realizeClassWithoutSwift_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, + "__ZL24realizeClassWithoutSwiftP10objc_classS0_"); + + for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) { + // this pointer is rebased by dyld and points to the correct class + // interface for some reason, we can skip this and it should still + // work + void *newCls = readClass((void *)data_ptr[ptr_i], false, false); + if (newCls != (void *)data_ptr[ptr_i]) { + realizeClassWithoutSwift(newCls, 0); + } + printf("add class init (%llx)%p\n", data_ptr[ptr_i], newCls); + } + } else if (custom_strncmp(secname, "__objc_nlclsbruh", 16) == 0) { + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + uint64_t *data_ptr = (uint64_t *)(addr + slide); + + uint64_t *loadable_classes = (uint64_t *)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, "__ZL16loadable_classes"); + uint32_t *loadable_classes_allocated = + (uint32_t *)find_in_symtab("/usr/lib/libobjc.A.dylib", &cache, + "__ZL26loadable_classes_allocated"); + uint32_t *loadable_classes_used = + (uint32_t *)find_in_symtab("/usr/lib/libobjc.A.dylib", &cache, + "__ZL21loadable_classes_used"); + + remapClass_t remapClass = + (remapClass_t)find_in_symtab("/usr/lib/libobjc.A.dylib", &cache, + "__ZL10remapClassP10objc_class"); + schedule_class_load_t schedule_class_load = + (schedule_class_load_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, + "__ZL19schedule_class_loadP10objc_class"); + realizeClassWithoutSwift_t realizeClassWithoutSwift = + (realizeClassWithoutSwift_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, + "__ZL24realizeClassWithoutSwiftP10objc_classS0_"); + addClassTableEntry_t addClassTableEntry = + (addClassTableEntry_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, + "__ZL18addClassTableEntryP10objc_classb"); + sel_lookUpByName_t sel_lookUpByName = + (sel_lookUpByName_t)find_in_symtab("/usr/lib/libobjc.A.dylib", + &cache, "_sel_lookUpByName"); + objc_autoreleasePoolPush_t objc_autoreleasePoolPush = + (objc_autoreleasePoolPush_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, + "__objc_autoreleasePoolPush"); + objc_autoreleasePoolPop_t objc_autoreleasePoolPop = + (objc_autoreleasePoolPop_t)find_in_symtab( + "/usr/lib/libobjc.A.dylib", &cache, + "__objc_autoreleasePoolPop"); + + // https://github.com/apple-oss-distributions/objc4/blob/689525d556eb3dee1ffb700423bccf5ecc501dbf/runtime/objc-runtime-new.mm#L3822 + for (int ptr_i = 0; ptr_i < size / 8; ptr_i++) { + void *cls = remapClass((void *)data_ptr[ptr_i]); + if (!cls) + continue; + addClassTableEntry(cls); + realizeClassWithoutSwift(cls, 0); + printf("build nonlazy class at (%llx)%p\n", data_ptr[ptr_i], cls); + } + + custom_initializer_i->sel_lookUpByName = sel_lookUpByName; + custom_initializer_i->loadable_classes = loadable_classes; + custom_initializer_i->loadable_classes_used = loadable_classes_used; + custom_initializer_i->objc_autoreleasePoolPush = + objc_autoreleasePoolPush; + custom_initializer_i->objc_autoreleasePoolPop = + objc_autoreleasePoolPop; + custom_initializer_i->schedule_class_load = schedule_class_load; + custom_initializer_i->remapClass = remapClass; + custom_initializer_i->cls = data_ptr; + custom_initializer_i->ncls = size / 8; + // printf("loadable_classes %llx\n", *loadable_classes); + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } else if (custom_strcmp(name, "__LINKEDIT") == 0) { + linkedit_vmaddr = vmaddr; + linkedit_fileoffset = fileoffset; + } + } + ptr += cmdsize; + } +} + +void fix_initializer(struct libcache_item *libfixing, struct libcache &cache) { + // fix the initializers + // The Objective-C runtime loads the NSObject after this lib booted + // So all calls to NSObject (and its children classes) will segfault/throw + // error + // + // So we will fix the main initializers, which runs after all Objective-C + // setup The initializers will run these Objective-C classes' load methods + // + // (THIS IDEA IS TESTED AND WILL NOT WORK) + // As of now, we assume the main executable has a __mod_init_func section + // In practice, we should always inject an empty __mod_init_func + // But if the binary already has a __mod_init_func section, this section must + // reallocate into a different section to allow for a bigger size (oldsize+1) + // + // This idea can't work because dyld check the pointer to be inside the image, + // which is not if we point it here + // + // (THIS IS THE CORRECT IDEA) + // So for Objective-C binaries, the load chain happens like this + // [(no objc?)lib init] -> [objc runtime] -> [foundations] -> + // [main obj-c load] -> [main constructor] -> [main] + // We need to inject between [objc runtime] and [main] + // + // There could be many ways to do this. I discovered 1 method of doing this. + // + // The idea is to hijack the main function to do the rest of the + // initalizations. By fixing the LC_MAIN command, we can make dyld jump to + // anywhere we want as main. But the command can't be edited at runtime. And + // pointing to the function we want needs a workaround. + // + // So we will write a shellcode in the binary and point main to that + // shellcode. The shellcode basically loads the address of the initalization + // function, call it, then call main, and return. + // + // The shellcode must be able ot get the current pc address to correctly + // calculate address from any offset. In arm64, we can use `adr x8, 0`. If we + // know where the shellcode is, we can effectively calculate the header of + // main. Now, everything is easy, just need offsets to anywhere we want and we + // can get it. + // + // Now the address of the initalization function can be fetched using many + // methods, but it resides inside this library. To reduce redundance work, we + // can write the address of this function somewhere inside main, which will + // then be easily found. + // + // As a result, we choose the space before __text to write the shellcode, + // the space after __DATA to write the address for initalization function. + // Because all segment is allocated/pre-allocated with page alignement, + // we can be pretty sure that there are free space. + // (note: __TEXT segment is aligned to the end of the page, free space in the + // middle) + // + // The shellcode is built using the ios-wrapper tool + // The idea is: + // + // push main arguments + // r8 = shellcode location + // r9 = offset from shellcode to __DATA end + // r8 = r8 + r9 -- get __DATA end address + // r9 = *r8 -- the first pointer is custom_initializer + // call r9 + // r9 = *(r8 + 4) -- the second pointer is main function + // pop main arguments + // jump r9 -- do not call, return to dyld + + void *header = libfixing->header; + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char *command_ptr = ptr; + + uint64_t linkedit_vmaddr; + uint64_t linkedit_fileoffset; + uint64_t slide; + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + uint64_t vmaddr = *((uint64_t *)ptr + 3); + uint64_t fileoffset = *((uint64_t *)ptr + 5); + // this assumes that __TEXT comes before __DATA_CONST + printf("segment %s\n", name); + if (custom_strcmp(name, "__TEXT") == 0) { + slide = (uint64_t)header - vmaddr; + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char *secname = sections_ptr; + printf("section %s\n", secname); + if (custom_strcmp(secname, "__init_offsets") == 0) { + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + uint32_t *data_ptr = (uint32_t *)(addr + slide); + + printf("found initializer at %p\n", data_ptr); + + custom_initializer_i->nconstructors = size / 4; + custom_initializer_i->constructors = + (uint64_t *)malloc(sizeof(uint64_t) * size / 4); + for (int j = 0; j < size / 4; j++) { + custom_initializer_i->constructors[j] = + (uint64_t)header + data_ptr[j]; + printf("registered initializer at %llx\n", + custom_initializer_i->constructors[j]); + } + } + if (custom_strcmp(secname, "__mod_init_func") == 0) { + // TODO: EHEHE + printf("initializer encoded in __mod_init_func is not supported\n"); + } + sections_ptr += 16 * 2 + 8 * 2 + 4 * 8; + } + } else if (custom_strcmp(name, "__DATA") == 0) { + uint64_t nsect = *((uint32_t *)ptr + 8 * 2); + char *sections_ptr = (char *)((uint32_t *)ptr + 18); + sections_ptr += (16 * 2 + 8 * 2 + 4 * 8) * (nsect - 1); + + uint64_t addr = *((uint64_t *)sections_ptr + 4); + uint64_t size = *((uint64_t *)sections_ptr + 5); + + uint64_t *dummy = (uint64_t *)(addr + slide + size); + dummy[0] = (uint64_t)custom_initializer; + dummy[1] = (uint64_t)(header) + bshield_data::main; + printf("-- add custom main-peg at %p\n", dummy); + printf("-- custom initializer at %llx\n", dummy[0]); + printf("-- main function at %llx\n", dummy[1]); + } else if (custom_strcmp(name, "__LINKEDIT") == 0) { + linkedit_vmaddr = vmaddr; + linkedit_fileoffset = fileoffset; + } + } + ptr += cmdsize; + } +} + +void test(struct libcache &cache) { + uint32_t libsystem_hash = + calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); + if (false) { // test search using name + void *printf_func = + custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_printf"); + printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf); + + void *vm_protect_func = + custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_vm_protect"); + printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func, + vm_protect); + + // using relative path + void *func_c_1 = + custom_dlsym(&cache, "./out/libb.dylib", "__Z15exported_from_cv"); + printf("Indirect search: Found=%p Expected=%p\n", func_c_1, + exported_from_c); + + // using rpath + void *func_c_2 = + custom_dlsym(&cache, "@rpath/libb.dylib", "__Z15exported_from_cv"); + printf("Indirect search: Found=%p Expected=%p\n", func_c_2, + exported_from_c); + } + + if (false) { // test search using hash of name + void *printf_func = custom_dlsym(&cache, libsystem_hash, "_printf"); + printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf); + + void *vm_protect_func = custom_dlsym(&cache, libsystem_hash, "_vm_protect"); + printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func, + vm_protect); + + void *realpath_func = + custom_dlsym(&cache, libsystem_hash, "_realpath$DARWIN_EXTSN"); + printf("Indirect search: Found=%p Expected=%p\n", realpath_func, realpath); + } +} diff --git a/research/strings_empty/tests/c_code.c b/research/strings_empty/tests/c_code.c new file mode 100644 index 0000000..0923337 --- /dev/null +++ b/research/strings_empty/tests/c_code.c @@ -0,0 +1,6 @@ +#include + +int main() { + printf("Hello, World!\n"); + return 0; +} diff --git a/research/strings_empty/tests/objc_code.m b/research/strings_empty/tests/objc_code.m new file mode 100644 index 0000000..9383daa --- /dev/null +++ b/research/strings_empty/tests/objc_code.m @@ -0,0 +1,8 @@ +#import + +int main() { + @autoreleasepool { + NSLog(@"Hello, World!"); + } + return 0; +} diff --git a/research/strings_empty/tests/swift_code.swift b/research/strings_empty/tests/swift_code.swift new file mode 100644 index 0000000..7df869a --- /dev/null +++ b/research/strings_empty/tests/swift_code.swift @@ -0,0 +1 @@ +print("Hello, World!")