diff --git a/research/custom_loader/.gitignore b/research/custom_loader/.gitignore new file mode 100644 index 0000000..89f9ac0 --- /dev/null +++ b/research/custom_loader/.gitignore @@ -0,0 +1 @@ +out/ diff --git a/research/custom_loader/a.c b/research/custom_loader/a.c new file mode 100644 index 0000000..c2685a1 --- /dev/null +++ b/research/custom_loader/a.c @@ -0,0 +1,4 @@ +#include +int main() { + printf("Hello World\n"); +} diff --git a/research/custom_loader/b.c b/research/custom_loader/b.c new file mode 100644 index 0000000..60f51ca --- /dev/null +++ b/research/custom_loader/b.c @@ -0,0 +1,361 @@ +#include +#include +#include +#include + +const uint32_t magic64 = 0xfeedfacf; +const uint32_t magic32 = 0xfeedface; + +struct ProgramVars { + void* mh; // mach_header or mach_header64 + int* NXArgcPtr; + const char*** NXArgvPtr; + const char*** environPtr; + const char** __prognamePtr; +}; + +extern "C" uint32_t dyld_get_sdk_version(const mach_header* mh); + +void decode_uleb128(char*& addr, uint32_t* ret) { + uint32_t result = 0; + int shift = 0; + + while (1) { + unsigned char byte = *(unsigned char*)(addr); + addr++; + + result |= (byte & 0x7f) << shift; + shift += 7; + + if (!(byte & 0x80)) break; + } + + *ret = result; +} + +void* find_header(void* _func) { + // Approach 1: (not stable) + // we assume that text section is small enough to fit on 1 page + // so the header should stay at the top of the page due to allocation logic + // the slice/slide is random but always align 0x1000 so we test a few values + // to see if the magic value is found + // + // Guaranteed to stop, but search range is small + + // const uint64_t page_size = 0x4000; + // uint64_t func = (uint64_t)_func; + // uint64_t potential_head = func + (0x4000 - (func % page_size)); + // void* head = 0; + // for (uint64_t i = 0x1000; i < 0xf000; i+=0x1000) { + // uint32_t* x = (uint32_t*)(potential_head - i); + // if (*x == magic64 || *x == magic32) { + // head = (void*)x; + // break; + // } + // } + // return head; + + // Approach 2: (more stable) + // We know that the header is 0x1000 aligned, + // just loop until the magic value is found + // Using while loop so ¯\_(ツ)_/¯ + const uint64_t page_size = 0x1000; + uint64_t func = (uint64_t)_func; + uint64_t potential_head = func + (0x1000 - (func % page_size)); + + void* head = 0; + uint32_t* x = (uint32_t*)(potential_head); + while (*x != magic64 && *x != magic32) { + x -= 0x1000/4; + } + return (void*)x; +} + +void print_macho_summary(const void* header) { + const uint32_t magic = *(uint32_t*)header; + char* ptr = (char*)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + const uint32_t ncmds = *((uint32_t*)header + 4); + printf("parsing macho at %p\n", header); + printf("ncmds %x\n", ncmds); + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t*)ptr + 0); + const uint32_t cmdsize = *((uint32_t*)ptr + 1); + printf(" cmd %x %x\n", cmd, cmdsize); + if (cmd == LC_DYLD_EXPORTS_TRIE) { + const uint32_t offset = *((uint32_t*)ptr + 2); + const uint32_t size = *((uint32_t*)ptr + 3); + printf(" export trie: offset=0x%x size=0x%x\n", offset, size); + } + if (cmd == LC_SEGMENT_64) { + char* name = (char*)((uint64_t*)ptr + 1); + uint64_t vmaddr = *((uint64_t*)ptr + 3); + uint64_t vmsize = *((uint64_t*)ptr + 4); + uint64_t fileoffset = *((uint64_t*)ptr + 5); + uint64_t filesize = *((uint64_t*)ptr + 6); + if (strcmp(name, "__TEXT") == 0) { + uint64_t slide = (uint64_t)header - vmaddr; + printf(" --- slide=0x%llx ---\n", slide); + } + printf(" Segment %s\n", name); + printf(" vmaddr=0x%llx fileoffset=0x%llx\n", vmaddr, fileoffset); + printf(" vmsize=0x%llx filesize=0x%llx\n", vmsize, filesize); + } + ptr += cmdsize; + } +} + +void* get_export_trie(const void* header, uint32_t& size) { + const uint32_t magic = *(uint32_t*)header; + char* ptr = (char*)header; + if (magic == magic64) { + ptr += 0x20; + } else { + ptr += 0x20 - 0x4; + } + + uint64_t slice = 0; + uint64_t linkedit_vmaddr = 0; + uint64_t linkedit_fileoffset = 0; + const uint32_t ncmds = *((uint32_t*)header + 4); + for (int i = 0; i < ncmds; i++) { + const uint32_t cmd = *((uint32_t*)ptr + 0); + const uint32_t cmdsize = *((uint32_t*)ptr + 1); + if (cmd == LC_DYLD_EXPORTS_TRIE) { + const uint32_t offset = *((uint32_t*)ptr + 2); + size = *((uint32_t*)ptr + 3); + uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset; + return (void*)(linkedit_vmaddr + slice + offset_in_linkedit); + } + if (cmd == LC_SEGMENT_64) { + char* name = (char*)((uint64_t*)ptr + 1); + uint64_t vmaddr = *((uint64_t*)ptr + 3); + uint64_t fileoffset = *((uint64_t*)ptr + 5); + if (strcmp(name, "__TEXT") == 0) { + slice = (uint64_t)header - vmaddr; + } else if (strcmp(name, "__LINKEDIT") == 0) { + linkedit_vmaddr = vmaddr; + linkedit_fileoffset = fileoffset; + } + } + ptr += cmdsize; + } + return 0; +} + +uint32_t should_follow_symbol(char*& buffer, char*& _find) { + // printf("follow check %s has prefix: %s\n", _find, buffer); + char* find = _find; + char is_prefix = true; + while (1) { + int find_end = *find == 0; + int buffer_end = *buffer == 0; + int check = *buffer == *find; + // printf("check is %x == %x\n", *buffer, *find); + + if (buffer_end) { + // we must always run to the end of buffer, marked 0x00 + buffer++; + break; + } + if (find_end) { + // symbol to find is shorter than current buffer string + // but we still need to run to the end of buffer + // so just set not prefix + is_prefix = false; + } + if (!check) { + is_prefix = false; + } + buffer++; + find++; + } + // only move forward if is_prefix + if (is_prefix) { + _find = find; + // printf("prefix is found\n"); + } + return is_prefix; +} + +void* find_in_export_trie(const void* header, void* trie, char* symbol) { + uint32_t func = 0; + + char* ptr = (char*)trie; + char* find = symbol; + while (1) { + // terminal node will have data + uint32_t data_count = 0; + decode_uleb128(ptr, &data_count); + if (data_count != 0) { + // printf("reached terminal node\n"); + break; + } + char num_child = ptr[0]; + ptr++; + + // printf("num child %d\n", num_child); + int still_following = 0; + for (char i = 0; i < num_child; i++) { + still_following = should_follow_symbol(ptr, find); + uint32_t follow_offset; + decode_uleb128(ptr, &follow_offset); + if (still_following) { + ptr = (char*)trie + follow_offset; + break; + } + } + + if (!still_following) { + // symbol not found + return 0; + } + } + + char count = *(ptr - 1); + ptr++; // flags + // uleb128 offset + decode_uleb128(ptr, &func); + return (void*)((char*)header + func); +} + +int hook_printf (const char * format, ... ) { + va_list args; + va_start(args, format); + + printf("HOOKED BEGIN LOL\n"); + int status = printf(format, args); + printf("HOOKED END LOL\n"); + + va_end(args); + return status; +} + +__attribute__((constructor)) +static void bruh(int argc, const char* const argv[], const char* const envp[], const char* const apple[], const struct ProgramVars* vars) { + // ProgramVars contains pointer to main executable (mapped) file + const void* main = (int*)(vars->mh); + // Find our lib (mapped) file + const void* thislib = find_header((void*)bruh); + // Find dyld lib (mapped) file using a no-sus function + const void* libdyld = find_header((void*)dyld_get_sdk_version); + + const void* libc = find_header((void*)printf); + + // From libdyld header, we can list exports table + // to find all function we want to use + // + // This way there is no leakage of functions we use to do our trick + // mostly to hide + // - _dyld_image_count + // - _dyld_get_image_name + // - _dyld_get_image_header + // - _dyld_get_image_vmaddr_slide + + // The above functions are crucial to find all libraries loaded + // From which we will traverse the exports table to replace + // _got and _la_symbol_pointer data + + // Our lib can hide more details too + // We can resolve all functions we use + // before resolving the main executable imports + // + // This will make our lib use only dyld_get_sdk_version + // For the main executable, imports are empty due to manual resolve + + printf("executable header at %p\n", main); + printf("lib header at %p\n", thislib); + printf("libdyld header at %p\n", libdyld); + + for (int i = 0; i < _dyld_image_count(); i++) { + void* header = (void*)_dyld_get_image_header(i); + char* name = (char*)_dyld_get_image_name(i); + int offset = _dyld_get_image_vmaddr_slide(i); + printf("%p 0x%x name=%s\n", header, offset, name); + } + + uint32_t trie_size; + void* thislib_export_trie = get_export_trie(thislib, trie_size); + void* libdyld_export_trie = get_export_trie(libdyld, trie_size); + void* libc_export_trie = get_export_trie(libc, trie_size); + + // printf("export this lib address %p\n", thislib_export_trie); + // for (int i = 0; i < 136; i++) { + // if (i % 0x10 == 0) printf("\n"); + // printf("%02x ", *((unsigned char*)thislib_export_trie + i)); + // } + // printf("\n"); + + + // printf("export dyld lib address %llx\n", (uint64_t)libdyld_export_trie); + // for (int i = 0; i < 0x11e0; i++) { + // if (i % 0x10 == 0) printf("\n"); + // printf("%02x ", *((unsigned char*)libdyld_export_trie + i)); + // } + // printf("\n"); + + // printf("export system lib address %llx\n", (uint64_t)system_export_trie); + // for (int i = 0; i < 0x10f30; i++) { + // if (i % 0x10 == 0) printf("\n"); + // printf("%02x ", *((unsigned char*)system_export_trie + i)); + // } + + // printf("\n"); + // FILE *write_ptr = fopen("../tmp/libc_export_trie.bin","wb"); + // fwrite(system_export_trie, trie_size, 1, write_ptr); + + struct test_find_export { + const char* name; + const void* lib; + void* trie; + void* original; + }; + + struct test_find_export find_export_testcases[] = { + {"__Z11find_headerPv", thislib, thislib_export_trie, (void*)find_header}, + {"__dyld_get_image_name", libdyld, libdyld_export_trie, (void*)_dyld_get_image_name}, + {"__dyld_image_count", libdyld, libdyld_export_trie, (void*)_dyld_image_count}, + {"_printf",libc, libc_export_trie, (void*)printf}, + }; + + for (int i = 0; i < 4; i++) { + struct test_find_export test = find_export_testcases[i]; + void* found = find_in_export_trie(test.lib, test.trie, (char*)test.name); + printf("%s: Found=%p | Expect=%p\n", test.name, found, test.original); + } + + // legacy symbol resolve + // fix got and la_symbol_ptr + // modern symbol resolve + // fix got + + uint64_t* got = (uint64_t*)((char*)main + 0x4000); + + + printf("BEFORE symbol bind code is %llx\n", *got); + vm_protect(mach_task_self(), (uint64_t)got, 0x4000, 0, VM_PROT_READ | VM_PROT_WRITE); + + // fix got table + // *got = (uint64_t)find_in_export_trie(libc, libc_export_trie, "_printf"); + *got = (uint64_t)hook_printf; + + // unsigned char* opcodes = (unsigned char*)got + 0x20; + // unsigned char original[] = { + // 0x73, 0x00, 0x13, 0x40, 0x5f, 0x70, 0x72, 0x69, + // 0x6e, 0x74, 0x66, 0x00, 0x90, 0x00, 0x00, 0x00 + // }; + // for (int i = 0; i < 0x10; i++) { + // printf("CHANGE AT %p %x => %x\n", opcodes+i, opcodes[i], original[i]); + // // opcodes[i] = original[i]; + // } + + vm_protect(mach_task_self(), (uint64_t)got, 0x4000, 0, VM_PROT_READ); + printf("AFTER symbol bind code is %llx\n", *got); + + printf("symbol should bind to %p\n", printf); +} diff --git a/research/custom_loader/build.sh b/research/custom_loader/build.sh new file mode 100755 index 0000000..e6a6bbe --- /dev/null +++ b/research/custom_loader/build.sh @@ -0,0 +1,8 @@ +set -ex + +VERSION=14 +OUT=./out + +mkdir -p $OUT +clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared b.c +clang++ -mmacosx-version-min=$VERSION -o $OUT/a a.c -L"./out" -lb diff --git a/research/scripts/bind.c b/research/scripts/bind.c new file mode 100644 index 0000000..aa3a0bb --- /dev/null +++ b/research/scripts/bind.c @@ -0,0 +1,24 @@ +#include +#include + +struct dyld_chained_ptr_64_bind +{ + uint64_t ordinal : 24, + addend : 8, // 0 thru 255 + reserved : 19, // all zeros + next : 12, // 4-byte stride + bind : 1; // == 1 +}; + +int main() { + // 0 = 0x8010000000000000 = 9227875636482146000 + uint64_t x = 0x8000000000000010; + uint8_t y[] = {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}; + struct dyld_chained_ptr_64_bind* bind = y; + // printf("x? %llx\n", x); + // printf("y? %llx\n", *(uint64_t*)y); + printf("bind? %d\n", bind->bind); + printf("next? %d\n", bind->next); + printf("ordinal? %d\n", bind->ordinal); + printf("addend? %d\n", bind->addend); +} diff --git a/research/scripts/dyld_export_trie.bin b/research/scripts/dyld_export_trie.bin new file mode 100644 index 0000000..ab007ff Binary files /dev/null and b/research/scripts/dyld_export_trie.bin differ diff --git a/research/scripts/go.mod b/research/scripts/go.mod new file mode 100644 index 0000000..34429e3 --- /dev/null +++ b/research/scripts/go.mod @@ -0,0 +1,7 @@ +module a + +go 1.20 + +require github.com/blacktop/go-macho v1.1.155 + +require github.com/blacktop/go-dwarf v1.0.9 // indirect diff --git a/research/scripts/go.sum b/research/scripts/go.sum new file mode 100644 index 0000000..7d215ad --- /dev/null +++ b/research/scripts/go.sum @@ -0,0 +1,4 @@ +github.com/blacktop/go-dwarf v1.0.9 h1:eT/L7gt0gllvvgnRXY0MFKjNB6+jtOY5DTm2ynVX2dY= +github.com/blacktop/go-dwarf v1.0.9/go.mod h1:4W2FKgSFYcZLDwnR7k+apv5i3nrau4NGl9N6VQ9DSTo= +github.com/blacktop/go-macho v1.1.155 h1:1yIFj2PxtenaPiB3eGwUSxmZki55f5b4JHGtfrJVFHs= +github.com/blacktop/go-macho v1.1.155/go.mod h1:f2X4noFBob4G5bWUrzvPBKDVcFWZgDCM7rIn7ygTID0= diff --git a/research/scripts/libc_export_trie.bin b/research/scripts/libc_export_trie.bin new file mode 100644 index 0000000..4019a0b Binary files /dev/null and b/research/scripts/libc_export_trie.bin differ diff --git a/research/scripts/parse_export_trie.go b/research/scripts/parse_export_trie.go new file mode 100644 index 0000000..d1dbf98 --- /dev/null +++ b/research/scripts/parse_export_trie.go @@ -0,0 +1,16 @@ +package main + +import "github.com/blacktop/go-macho/pkg/trie" +import "os" +import "bytes" +import "fmt" + +func main() { + // buffer, _ := os.ReadFile("./dyld_export_trie.bin") + buffer, _ := os.ReadFile("./libc_export_trie.bin") + r := bytes.NewReader(buffer) + nodes, _ := trie.ParseTrieExports(r, 0x7ff80715c000) + for _, node := range nodes { + fmt.Println(node) + } +} diff --git a/research/scripts/system_exoprt_trie.bin b/research/scripts/system_exoprt_trie.bin new file mode 100644 index 0000000..80b110f Binary files /dev/null and b/research/scripts/system_exoprt_trie.bin differ diff --git a/research/scripts/system_export_trie.bin b/research/scripts/system_export_trie.bin new file mode 100644 index 0000000..80b110f Binary files /dev/null and b/research/scripts/system_export_trie.bin differ