From e3a1dcf76a4787d14b08861b59c4669548df0d74 Mon Sep 17 00:00:00 2001 From: nganhkhoa Date: Fri, 2 Jun 2023 15:24:09 +0700 Subject: [PATCH] update code structure for custom_loader --- research/custom_loader/b.cc | 222 ++++++++++++++++++++++++-------- research/custom_loader/build.sh | 7 +- research/custom_loader/c.cc | 4 +- 3 files changed, 177 insertions(+), 56 deletions(-) diff --git a/research/custom_loader/b.cc b/research/custom_loader/b.cc index 682e877..0127465 100644 --- a/research/custom_loader/b.cc +++ b/research/custom_loader/b.cc @@ -42,6 +42,12 @@ struct libcache_item { void *trie; uint32_t trie_size; uint32_t hash; + + uint64_t slide; + + // pointer to segment address + uint32_t nsegment; + uint64_t* segment; }; struct libcache { @@ -226,6 +232,17 @@ void print_macho_summary(const void *header) { printf(" Segment %s\n", name); printf(" vmaddr=0x%llx fileoffset=0x%llx\n", vmaddr, fileoffset); printf(" vmsize=0x%llx filesize=0x%llx\n", vmsize, filesize); + + uint64_t nsect = *((uint32_t *)ptr + 8*2); + char* sections_ptr = (char*)((uint32_t*)ptr + 18); + for (int sec = 0; sec < nsect; sec++) { + char* secname = sections_ptr; + uint64_t addr = *((uint64_t*)sections_ptr + 4); + uint64_t size = *((uint64_t*)sections_ptr + 5); + uint32_t fileoffset = *((uint32_t*)sections_ptr + 6*2); + printf(" Section %s\n", sections_ptr); + printf(" addr=0x%llx size=0x%llx fileoffset=0x%x\n", addr, size, fileoffset); + } } if (cmd == LC_REEXPORT_DYLIB) { uint32_t name_offset = *((uint32_t *)ptr + 2); @@ -429,6 +446,61 @@ void *custom_dlsym(struct libcache *cache, uint32_t hash, const char *symbol) { return 0; } +void bootstrap_libcache_item(struct libcache_item* item, const void* header, const char* name) { + item->header = (void*)header; + item->trie = get_export_trie(header, item->trie_size); + + const uint32_t magic = *(uint32_t *)header; + char *ptr = (char *)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t *)header + 4); + char* command_ptr = ptr; + + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + char *name = (char *)((uint64_t *)ptr + 1); + if (custom_strcmp(name, "__TEXT") == 0) { + uint64_t vmaddr = *((uint64_t *)ptr + 3); + item->slide = (uint64_t)header - vmaddr; + } + item->nsegment++; + } + ptr += cmdsize; + } + + ptr = command_ptr; + item->segment = (uint64_t*)malloc(sizeof(uint64_t) * item->nsegment); + for (int i = 0, segment_i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t *)ptr + 0); + const uint32_t cmdsize = *((uint32_t *)ptr + 1); + if (cmd == LC_SEGMENT_64) { + uint64_t vmaddr = *((uint64_t *)ptr + 3); + item->segment[segment_i++] = (vmaddr + item->slide); + } + ptr += cmdsize; + } + return; + +} + +struct libcache_item* get_libcache_with_name(struct libcache* cache, const char* name) { + uint32_t hash = calculate_libname_hash(cache, name); + for (int i = 0; i < cache->size; i++) { + struct libcache_item* cache_lib = &cache->libs[i]; + if (cache_lib->hash == hash) { + return cache_lib; + } + } + return 0; +} + void dump_export_trie(const void* trie, uint32_t size, const char* filename) { FILE *outfile = fopen(filename, "wb"); fwrite((char*)trie, size, 1, outfile); @@ -465,21 +537,53 @@ struct ProgramVars { const char **__prognamePtr; }; +void build_cache(struct libcache& cache, void* main); +void fix(struct libcache& cache); + +void test(struct libcache& cache); + __attribute__((constructor)) static void bruh(int argc, const char *const argv[], const char *const envp[], const char *const apple[], const struct ProgramVars *vars) { set_cwd(envp); // ProgramVars contains pointer to main executable (mapped) file - const void *main = (int *)(vars->mh); + + struct libcache cache; + build_cache(cache, (void *)(vars->mh)); + + // dump_export_trie_of( + // "/usr/lib/system/libsystem_c.dylib", &cache, + // "../scripts/libsystem_c_export_trie.bin"); + + // test(cache); + + // ATTENTION: + // If we choose to resolve **this** lib + // Before resolve is complete, **DO NOT** call any library function + // + // The following functions can be used: + // (we do not remove them for **our lib**) + // - malloc + // - free + fix(cache); + + for (int i = 0; i < cache.size; i++) { + free(cache.libs[i].segment); + } + free(cache.libs); +} + +void build_cache(struct libcache& cache, void* main) { const uint64_t main_slide = get_slide(main); // Find our lib (mapped) file const void *thislib = find_header((void *)bruh); // Find dyld lib (mapped) file using a no-sus function const void *libdyld = find_header((void *)dyld_get_sdk_version); - struct libcache cache = {0, 0, (void *)main, (void *)thislib, - (void *)libdyld}; + cache.main = (void*)main; + cache.thislib = (void*)thislib; + cache.libdyld = (void*)libdyld; uint32_t libsystem_hash = calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); @@ -508,10 +612,16 @@ bruh(int argc, const char *const argv[], const char *const envp[], printf("lib header at %p\n", thislib); printf("libdyld header at %p\n", libdyld); + // print_macho_summary(main); + // print_macho_summary(thislib); + uint32_t trie_size; - void *thislib_export_trie = get_export_trie(thislib, trie_size); void *libdyld_export_trie = get_export_trie(libdyld, trie_size); + // we have to traverse the trie to find these symbols + // because if we self-rebuild import table for **this** lib, + // these symbols aren't resolved + // so we have to resolve ourselves and then rebuild the symbols for others typedef int (*dyld_image_count_t)(void); typedef char *(*dyld_get_image_name_t)(int); typedef void *(*dyld_get_image_header_t)(int); @@ -530,50 +640,13 @@ bruh(int argc, const char *const argv[], const char *const envp[], for (int i = 0; i < cache.size; i++) { void *header = dyld_get_image_header_func(i); char *name = dyld_get_image_name_func(i); - cache.libs[i].header = header; - cache.libs[i].trie = get_export_trie(header, trie_size); - cache.libs[i].trie_size = trie_size; + bootstrap_libcache_item(&cache.libs[i], header, name); cache.libs[i].hash = calculate_libname_hash(&cache, name); printf("%p %s\n", header, name); } +} - // dump_export_trie_of("/usr/lib/system/libsystem_c.dylib", &cache, "../scripts/libsystem_c_export_trie.bin"); - - if (false) { // test search using name - void *printf_func = - custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_printf"); - printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf); - - void *vm_protect_func = - custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_vm_protect"); - printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func, - vm_protect); - - // using relative path - void *func_c_1 = - custom_dlsym(&cache, "./out/libb.dylib", "__Z15exported_from_cv"); - printf("Indirect search: Found=%p Expected=%p\n", func_c_1, - exported_from_c); - - // using rpath - void *func_c_2 = - custom_dlsym(&cache, "@rpath/libb.dylib", "__Z15exported_from_cv"); - printf("Indirect search: Found=%p Expected=%p\n", func_c_2, - exported_from_c); - } - - if (false) { // test search using hash of name - void *printf_func = custom_dlsym(&cache, libsystem_hash, "_printf"); - printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf); - - void *vm_protect_func = custom_dlsym(&cache, libsystem_hash, "_vm_protect"); - printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func, - vm_protect); - - void *realpath_func = custom_dlsym(&cache, libsystem_hash, "_realpath$DARWIN_EXTSN"); - printf("Indirect search: Found=%p Expected=%p\n", realpath_func, realpath); - } - +void fix(struct libcache& cache) { // now we have function to find exported symbols // it supports full name search or hash search // to reserve space, we use the hash search @@ -617,6 +690,9 @@ bruh(int argc, const char *const argv[], const char *const envp[], // may need to look into why this happens so we can deal with this more // generic + uint32_t libsystem_hash = + calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); + // this is just for now, // in the future we will self fix our lib so our lib also has no import typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int); @@ -630,18 +706,22 @@ bruh(int argc, const char *const argv[], const char *const envp[], uint64_t page_rw_fixed[10]; // should be dynamic, but works for now #include "out/b.h" + // think of a way to get what binary to fix + // so we can iterate through them + struct libcache_item* libfixing = get_libcache_with_name(&cache, "./out/a"); for (int i = 0; i < nimports; i++) { struct imported_symbol symbol = imported_table[i]; + uint64_t fix_at = symbol.offset + libfixing->segment[symbol.segment_i]; int need_rw_fix = true; for (int j = 0; j < npage_rw_fixed; j++) { - if (page_rw_fixed[j] <= symbol.address && - page_rw_fixed[j] + 0x4000 > symbol.address) { + if (page_rw_fixed[j] <= fix_at && + page_rw_fixed[j] + 0x4000 > fix_at) { need_rw_fix = false; } } if (need_rw_fix) { - uint64_t start_page = symbol.address - (symbol.address % 0x4000); + uint64_t start_page = fix_at - (fix_at % 0x4000); vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0, VM_PROT_READ | VM_PROT_WRITE); page_rw_fixed[npage_rw_fixed++] = start_page; @@ -655,18 +735,56 @@ bruh(int argc, const char *const argv[], const char *const envp[], // but fuck apple they have relative path and rpath resolved = custom_dlsym(&cache, symbol.lib, symbol.name); } - uint64_t fix_at = symbol.address + main_slide; *(uint64_t *)fix_at = (uint64_t)resolved; printf("imports need to fix: (0x%x)%s at 0x%llx\n", symbol.hash, - symbol.name, symbol.address); - printf(" resolved %p\n", resolved); + symbol.name, fix_at); + printf(" from=%s\n", symbol.lib); + printf(" segment id=%d; offset=0x%llx;", symbol.segment_i, symbol.offset); + printf(" resolved=%p\n", resolved); } for (int j = 0; j < npage_rw_fixed; j++) { uint64_t start_page = page_rw_fixed[j]; vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0, VM_PROT_READ); } - - free(cache.libs); +} + +void test(struct libcache& cache) { + uint32_t libsystem_hash = + calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib"); + if (false) { // test search using name + void *printf_func = + custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_printf"); + printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf); + + void *vm_protect_func = + custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_vm_protect"); + printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func, + vm_protect); + + // using relative path + void *func_c_1 = + custom_dlsym(&cache, "./out/libb.dylib", "__Z15exported_from_cv"); + printf("Indirect search: Found=%p Expected=%p\n", func_c_1, + exported_from_c); + + // using rpath + void *func_c_2 = + custom_dlsym(&cache, "@rpath/libb.dylib", "__Z15exported_from_cv"); + printf("Indirect search: Found=%p Expected=%p\n", func_c_2, + exported_from_c); + } + + if (false) { // test search using hash of name + void *printf_func = custom_dlsym(&cache, libsystem_hash, "_printf"); + printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf); + + void *vm_protect_func = custom_dlsym(&cache, libsystem_hash, "_vm_protect"); + printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func, + vm_protect); + + void *realpath_func = custom_dlsym(&cache, libsystem_hash, "_realpath$DARWIN_EXTSN"); + printf("Indirect search: Found=%p Expected=%p\n", realpath_func, realpath); + } } diff --git a/research/custom_loader/build.sh b/research/custom_loader/build.sh index a8a9bb1..249ff37 100755 --- a/research/custom_loader/build.sh +++ b/research/custom_loader/build.sh @@ -6,7 +6,12 @@ LOGIC=2 mkdir -p $OUT -if [[ $LOGIC -eq 1 ]] +if [[ $LOGIC -eq 0 ]] +then + +clang-format -i -style=llvm *.cc + +elif [[ $LOGIC -eq 1 ]] then # full poc flow diff --git a/research/custom_loader/c.cc b/research/custom_loader/c.cc index dd4deea..fe9c935 100644 --- a/research/custom_loader/c.cc +++ b/research/custom_loader/c.cc @@ -1,4 +1,2 @@ #include -void exported_from_c() { - printf("from c\n"); -} +void exported_from_c() { printf("from c\n"); }