macho/research/custom_loader/b.cc

930 lines
29 KiB
C++

#include <mach-o/dyld.h>
#include <mach/mach.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
char *pwd;
uint32_t pwd_len;
int custom_strcmp(const char *p1, const char *p2) {
const unsigned char *s1 = (const unsigned char *)p1;
const unsigned char *s2 = (const unsigned char *)p2;
unsigned char c1, c2;
do {
c1 = (unsigned char)*s1++;
c2 = (unsigned char)*s2++;
if (c1 == '\0')
return c1 - c2;
} while (c1 == c2);
return c1 - c2;
}
int custom_strncmp(const char *s1, const char *s2, register size_t n)
{
register unsigned char u1, u2;
while (n-- > 0)
{
u1 = (unsigned char) *s1++;
u2 = (unsigned char) *s2++;
if (u1 != u2)
return u1 - u2;
if (u1 == '\0')
return 0;
}
return 0;
}
void set_cwd(const char *const *envp) {
while (*envp) {
// PWD=
if (0x3d445750 == *(uint32_t *)(*envp)) {
break;
}
envp++;
}
pwd = (char *)(*envp + 4);
for (; pwd[pwd_len] != 0; pwd_len++)
;
}
const uint32_t magic64 = 0xfeedfacf;
const uint32_t magic32 = 0xfeedface;
struct libcache_item {
void *header;
void *trie;
uint32_t trie_size;
uint32_t hash;
uint64_t slide;
// pointer to segment address
uint32_t nsegment;
uint64_t* segment;
};
struct libcache {
struct libcache_item *libs;
uint32_t size;
void *main;
void *thislib;
void *libdyld;
};
// try these hashes
// https://gist.github.com/sgsfak/9ba382a0049f6ee885f68621ae86079b
uint32_t fnv_hash(const char *str) {
unsigned char *s = (unsigned char *)str; /* unsigned string */
/* See the FNV parameters at www.isthe.com/chongo/tech/comp/fnv/#FNV-param */
const uint32_t FNV_32_PRIME = 0x01000193; /* 16777619 */
uint32_t h = 0x811c9dc5; /* 2166136261 */
while (*s != 0) {
/* xor the bottom with the current octet */
h ^= *s++;
/* multiply by the 32 bit FNV magic prime mod 2^32 */
h *= FNV_32_PRIME;
}
return h;
}
// calculate the hash to search
// _dyld_get_image_name returns the full path to the library
// while the static path in LC_DYLIB (and such) could be relative
// we should expand the path to fullpath to correctly compute the hash
//
// the hardest part is the @rpath, because there can be many LC_RPATH
// and @rpath can also reference @loader_path
uint32_t calculate_libname_hash(const libcache *cache, const char *name) {
uint32_t hash;
uint32_t (*hash_func)(const char *) = fnv_hash;
if (name[0] == '.') {
// resolve relative path with ./ ../ ../../ and so on
char *p = realpath(name, 0);
hash = hash_func(p);
free(p);
} else if (name[0] == '@') {
// TODO: resolve @rpath
// ohyeah this is gonna be wild
// loop through all rpath and resolve that rpath
// then resolve the full path for all rpath
//
// which rpath is correct can be done by checking if the cache has that hash
printf("resolver for @rpath is not supported yet\n");
} else {
hash = hash_func(name);
}
return hash;
}
// dummy no sus function to look for dyld header
// i don't know if dyld_stub_binder should be better
// because if they are not familiar with dyld
// they would not suspect dyld_stub_binder inside modern macho
// Added iOS 6, macOS 10.8
extern "C" uint32_t dyld_get_sdk_version(const mach_header *mh);
void exported_from_c();
void decode_uleb128(char *&addr, uint32_t *ret) {
uint32_t result = 0;
int shift = 0;
while (1) {
unsigned char byte = *(unsigned char *)(addr);
addr++;
result |= (byte & 0x7f) << shift;
shift += 7;
if (!(byte & 0x80))
break;
}
*ret = result;
}
void *find_header(void *_func) {
// Approach 1: (not stable)
// we assume that text section is small enough to fit on 1 page
// so the header should stay at the top of the page due to allocation logic
// the slice/slide is random but always align 0x1000 so we test a few values
// to see if the magic value is found
//
// Guaranteed to stop, but search range is small
// const uint64_t page_size = 0x4000;
// uint64_t func = (uint64_t)_func;
// uint64_t potential_head = func + (0x4000 - (func % page_size));
// void* head = 0;
// for (uint64_t i = 0x1000; i < 0xf000; i+=0x1000) {
// uint32_t* x = (uint32_t*)(potential_head - i);
// if (*x == magic64 || *x == magic32) {
// head = (void*)x;
// break;
// }
// }
// return head;
// Approach 2: (more stable)
// We know that the header is 0x1000 aligned,
// just loop until the magic value is found
// Using while loop so ¯\_(ツ)_/¯
const uint64_t page_size = 0x1000;
uint64_t func = (uint64_t)_func;
uint64_t potential_head = func + (0x1000 - (func % page_size));
void *head = 0;
uint32_t *x = (uint32_t *)(potential_head);
while (*x != magic64 && *x != magic32) {
x -= 0x1000 / 4;
}
return (void *)x;
}
uint64_t get_slide(const void *header) {
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
uint64_t slice = 0;
const uint32_t ncmds = *((uint32_t *)header + 4);
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd == LC_SEGMENT_64) {
char *name = (char *)((uint64_t *)ptr + 1);
uint64_t vmaddr = *((uint64_t *)ptr + 3);
uint64_t fileoffset = *((uint64_t *)ptr + 5);
if (custom_strcmp(name, "__TEXT") == 0) {
slice = (uint64_t)header - vmaddr;
return slice;
}
}
ptr += cmdsize;
}
return 0;
}
void print_macho_summary(const void *header) {
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
const uint32_t ncmds = *((uint32_t *)header + 4);
uint64_t linkedit_vmaddr;
uint64_t linkedit_fileoffset;
uint64_t slide;
printf("parsing macho at %p\n", header);
printf("ncmds %x\n", ncmds);
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
printf(" cmd %x %x\n", cmd, cmdsize);
if (cmd == LC_DYLD_EXPORTS_TRIE) {
const uint32_t offset = *((uint32_t *)ptr + 2);
const uint32_t size = *((uint32_t *)ptr + 3);
printf(" export trie: offset=0x%x size=0x%x\n", offset, size);
}
if (cmd == LC_SEGMENT_64) {
char *name = (char *)((uint64_t *)ptr + 1);
uint64_t vmaddr = *((uint64_t *)ptr + 3);
uint64_t vmsize = *((uint64_t *)ptr + 4);
uint64_t fileoffset = *((uint64_t *)ptr + 5);
uint64_t filesize = *((uint64_t *)ptr + 6);
if (custom_strcmp(name, "__TEXT") == 0) {
slide = (uint64_t)header - vmaddr;
printf(" --- slide=0x%llx ---\n", slide);
} else if (custom_strcmp(name, "__LINKEDIT") == 0) {
linkedit_vmaddr = vmaddr;
linkedit_fileoffset = fileoffset;
}
printf(" Segment %s\n", name);
printf(" vmaddr=0x%llx fileoffset=0x%llx\n", vmaddr, fileoffset);
printf(" vmsize=0x%llx filesize=0x%llx\n", vmsize, filesize);
uint64_t nsect = *((uint32_t *)ptr + 8*2);
char* sections_ptr = (char*)((uint32_t*)ptr + 18);
for (int sec = 0; sec < nsect; sec++) {
char* secname = sections_ptr;
uint64_t addr = *((uint64_t*)sections_ptr + 4);
uint64_t size = *((uint64_t*)sections_ptr + 5);
uint32_t fileoffset = *((uint32_t*)sections_ptr + 6*2);
printf(" Section %s\n", sections_ptr);
printf(" addr=0x%llx size=0x%llx fileoffset=0x%x\n", addr, size, fileoffset);
}
}
if (cmd == LC_SYMTAB) {
uint32_t symoff = *((uint32_t *)ptr + 2);
uint32_t nsym = *((uint32_t *)ptr + 3);
uint32_t stroff = (*((uint32_t *)ptr + 4));
uint32_t strsize = *((uint32_t *)ptr + 5);
struct symbol_t {
uint32_t strx;
uint8_t flags;
uint8_t sect;
uint16_t desc;
uint64_t value;
};
uint64_t symtab_start = (uint64_t)symoff - linkedit_fileoffset + slide + linkedit_vmaddr;
uint64_t stroff_start = (uint64_t)stroff - linkedit_fileoffset + slide + linkedit_vmaddr;
printf(" symtab: offset=0x%x nsym=0x%x\n", symoff, nsym);
for (int j = 0; j < nsym; j++) {
struct symbol_t* symtab = (struct symbol_t*)symtab_start;
struct symbol_t symbol = symtab[j];
char* name = (char*)stroff_start + symbol.strx;
printf(" %s %llx => %p\n", name, symbol.value, (void*)(symbol.value + slide));
}
}
if (cmd == LC_REEXPORT_DYLIB) {
uint32_t name_offset = *((uint32_t *)ptr + 2);
char *name = (char *)ptr + name_offset;
printf(" reexport lib %s\n", name);
}
ptr += cmdsize;
}
}
void *get_export_trie(const void *header, uint32_t &size) {
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
uint64_t slice = 0;
uint64_t linkedit_vmaddr = 0;
uint64_t linkedit_fileoffset = 0;
const uint32_t ncmds = *((uint32_t *)header + 4);
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd == LC_DYLD_EXPORTS_TRIE) {
const uint32_t offset = *((uint32_t *)ptr + 2);
size = *((uint32_t *)ptr + 3);
uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset;
return (void *)(linkedit_vmaddr + slice + offset_in_linkedit);
}
if (cmd == LC_DYLD_INFO_ONLY) {
const uint32_t offset = *((uint32_t *)ptr + 10);
size = *((uint32_t *)ptr + 11);
uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset;
return (void *)(linkedit_vmaddr + slice + offset_in_linkedit);
}
if (cmd == LC_SEGMENT_64) {
char *name = (char *)((uint64_t *)ptr + 1);
uint64_t vmaddr = *((uint64_t *)ptr + 3);
uint64_t fileoffset = *((uint64_t *)ptr + 5);
if (custom_strcmp(name, "__TEXT") == 0) {
slice = (uint64_t)header - vmaddr;
} else if (custom_strcmp(name, "__LINKEDIT") == 0) {
linkedit_vmaddr = vmaddr;
linkedit_fileoffset = fileoffset;
}
}
ptr += cmdsize;
}
return 0;
}
uint32_t should_follow_symbol(char *&buffer, char *&_find) {
// printf("follow check %s has prefix: %s\n", _find, buffer);
char *find = _find;
char is_prefix = true;
while (1) {
int find_end = *find == 0;
int buffer_end = *buffer == 0;
int check = *buffer == *find;
// printf("check is %x == %x\n", *buffer, *find);
if (buffer_end) {
// we must always run to the end of buffer, marked 0x00
buffer++;
break;
}
if (find_end) {
// symbol to find is shorter than current buffer string
// but we still need to run to the end of buffer
// so just set not prefix
is_prefix = false;
}
if (!check) {
is_prefix = false;
}
buffer++;
find++;
}
// only move forward if is_prefix
if (is_prefix) {
_find = find;
// printf("prefix is found\n");
}
return is_prefix;
}
void *find_in_export_trie(const void *header, void *trie, const char *symbol) {
uint32_t func = 0;
char *ptr = (char *)trie;
char *find = (char *)symbol;
while (1) {
// terminal node will have data
uint32_t data_count = 0;
decode_uleb128(ptr, &data_count);
if (data_count != 0 && *find == 0) {
// printf("reached terminal node\n");
break;
} else if (data_count) {
// still need to follow the branch
ptr += data_count;
}
char num_child = ptr[0];
ptr++;
int still_following = 0;
for (char i = 0; i < num_child; i++) {
still_following = should_follow_symbol(ptr, find);
uint32_t follow_offset;
decode_uleb128(ptr, &follow_offset);
if (still_following) {
ptr = (char *)trie + follow_offset;
break;
}
}
if (!still_following) {
// symbol not found
return 0;
}
}
char count = *(ptr - 1);
ptr++; // flags
// uleb128 offset
decode_uleb128(ptr, &func);
return (void *)((char *)header + func);
}
void *find_in_lib(struct libcache *cache, struct libcache_item *lib,
const char *symbol);
void *find_in_reexport(struct libcache *cache, struct libcache_item *lib,
const char *symbol) {
void *header = lib->header;
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
const uint32_t ncmds = *((uint32_t *)header + 4);
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd != LC_REEXPORT_DYLIB) {
ptr += cmdsize;
continue;
}
uint32_t name_offset = *((uint32_t *)ptr + 2);
char *name = (char *)ptr + name_offset;
uint32_t hash = calculate_libname_hash(cache, name);
for (int j = 0; j < cache->size; j++) {
struct libcache_item reexport = cache->libs[j];
if (reexport.hash != hash) {
continue;
}
void *found = find_in_lib(cache, &reexport, symbol);
if (found)
return found;
}
ptr += cmdsize;
}
return 0;
}
void *find_in_lib(struct libcache *cache, struct libcache_item *lib,
const char *symbol) {
void *direct = find_in_export_trie(lib->header, lib->trie, symbol);
if (direct)
return direct;
// cannot find in directly exported trie, loop through all reexport libs
return find_in_reexport(cache, lib, symbol);
}
void *custom_dlsym(struct libcache *cache, const char *libname,
const char *symbol) {
uint32_t hash = calculate_libname_hash(cache, libname);
for (int i = 0; i < cache->size; i++) {
struct libcache_item cache_lib = cache->libs[i];
if (cache_lib.hash == hash) {
return find_in_lib(cache, &cache_lib, symbol);
}
}
printf("cannot find lib with hash 0x%x\n", hash);
return 0;
}
void *custom_dlsym(struct libcache *cache, uint32_t hash, const char *symbol) {
for (int i = 0; i < cache->size; i++) {
struct libcache_item cache_lib = cache->libs[i];
if (cache_lib.hash == hash) {
return find_in_lib(cache, &cache_lib, symbol);
}
}
return 0;
}
void bootstrap_libcache_item(struct libcache_item* item, const void* header, const char* name) {
item->header = (void*)header;
item->trie = get_export_trie(header, item->trie_size);
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
const uint32_t ncmds = *((uint32_t *)header + 4);
char* command_ptr = ptr;
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd == LC_SEGMENT_64) {
char *name = (char *)((uint64_t *)ptr + 1);
if (custom_strcmp(name, "__TEXT") == 0) {
uint64_t vmaddr = *((uint64_t *)ptr + 3);
item->slide = (uint64_t)header - vmaddr;
}
item->nsegment++;
}
ptr += cmdsize;
}
ptr = command_ptr;
item->segment = (uint64_t*)malloc(sizeof(uint64_t) * item->nsegment);
for (int i = 0, segment_i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd == LC_SEGMENT_64) {
uint64_t vmaddr = *((uint64_t *)ptr + 3);
item->segment[segment_i++] = (vmaddr + item->slide);
}
ptr += cmdsize;
}
return;
}
struct libcache_item* get_libcache_with_name(struct libcache* cache, const char* name) {
void* to_find = 0;
if (custom_strcmp(name, "main") == 0) {
to_find = cache->main;
} else if (custom_strcmp(name, "thislib") == 0) {
to_find = cache->thislib;
}
uint32_t hash = calculate_libname_hash(cache, name);
for (int i = 0; i < cache->size; i++) {
struct libcache_item* cache_lib = &cache->libs[i];
// search by hash or by pointer for special case
if (cache_lib->hash == hash || cache_lib->header == to_find) {
return cache_lib;
}
}
return 0;
}
void dump_export_trie(const void* trie, uint32_t size, const char* filename) {
FILE *outfile = fopen(filename, "wb");
fwrite((char*)trie, size, 1, outfile);
fclose(outfile);
}
void dump_export_trie_of(const char* libname, const libcache* cache, const char* filename) {
uint32_t hash = calculate_libname_hash(cache, libname);
for (int i = 0; i < cache->size; i++) {
struct libcache_item cache_lib = cache->libs[i];
if (cache_lib.hash == hash) {
return dump_export_trie(cache_lib.trie, cache_lib.trie_size, filename);
}
}
}
void* find_in_symtab(const libcache_item* lib, const char* find) {
void* header = lib->header;
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
const uint32_t ncmds = *((uint32_t *)header + 4);
char* command_ptr = ptr;
uint64_t linkedit_vmaddr;
uint64_t linkedit_fileoffset;
uint64_t slide;
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd == LC_SYMTAB) {
uint32_t symoff = *((uint32_t *)ptr + 2);
uint32_t nsym = *((uint32_t *)ptr + 3);
uint32_t stroff = (*((uint32_t *)ptr + 4));
uint32_t strsize = *((uint32_t *)ptr + 5);
struct symbol_t {
uint32_t strx;
uint8_t flags;
uint8_t sect;
uint16_t desc;
uint64_t value;
};
uint64_t symtab_start = (uint64_t)symoff - linkedit_fileoffset + slide + linkedit_vmaddr;
uint64_t stroff_start = (uint64_t)stroff - linkedit_fileoffset + slide + linkedit_vmaddr;
for (int j = 0; j < nsym; j++) {
struct symbol_t* symtab = (struct symbol_t*)symtab_start;
struct symbol_t symbol = symtab[j];
char* name = (char*)stroff_start + symbol.strx;
if (custom_strcmp(name, find) == 0) {
return (void*)(symbol.value + slide);
}
}
break;
}
if (cmd == LC_SEGMENT_64) {
char *name = (char *)((uint64_t *)ptr + 1);
uint64_t vmaddr = *((uint64_t *)ptr + 3);
uint64_t fileoffset = *((uint64_t *)ptr + 5);
if (custom_strcmp(name, "__TEXT") == 0) {
slide = (uint64_t)header - vmaddr;
} else if (custom_strcmp(name, "__LINKEDIT") == 0) {
linkedit_vmaddr = vmaddr;
linkedit_fileoffset = fileoffset;
}
}
ptr += cmdsize;
}
return 0;
}
void* find_in_symtab(const char* libname, const libcache* cache, const char* find) {
uint32_t hash = calculate_libname_hash(cache, libname);
struct libcache_item *cache_lib = 0;
for (int i = 0; i < cache->size; i++) {
if (cache->libs[i].hash == hash) {
cache_lib = &(cache->libs[i]);
break;
}
}
return find_in_symtab(cache_lib, find);
}
int hook_printf(const char *format, ...) {
va_list args;
va_start(args, format);
printf("HOOKED BEGIN LOL\n");
int status = printf(format, args);
printf("HOOKED END LOL\n");
va_end(args);
return status;
}
struct ProgramVars {
void *mh; // mach_header or mach_header64
int *NXArgcPtr;
const char ***NXArgvPtr;
const char ***environPtr;
const char **__prognamePtr;
};
void build_cache(struct libcache& cache, void* main);
void fix(struct libcache& cache);
void test(struct libcache& cache);
__attribute__((constructor)) static void
bruh(int argc, const char *const argv[], const char *const envp[],
const char *const apple[], const struct ProgramVars *vars) {
printf("=== manual symbol bind starts ===\n");
set_cwd(envp);
// ProgramVars contains pointer to main executable (mapped) file
struct libcache cache;
build_cache(cache, (void *)(vars->mh));
// dump_export_trie_of(
// "/usr/lib/libobjc.A.dylib", &cache,
// "../scripts/lib_objc_export_trie.bin");
// dump_macho(
// "/usr/lib/libobjc.A.dylib", &cache,
// "../scripts/lib_objc_symtab.bin");
// struct libcache_item* objc = get_libcache_with_name(&cache, "/usr/lib/libobjc.A.dylib");
// print_macho_summary(objc->header);
// test(cache);
// ATTENTION:
// If we choose to resolve **this** lib
// Before resolve is complete, **DO NOT** call any library function
//
// The following functions can be used:
// (we do not remove them for **our lib**)
// - malloc
// - free
fix(cache);
for (int i = 0; i < cache.size; i++) {
free(cache.libs[i].segment);
}
free(cache.libs);
printf("=== manual symbol bind completes ===\n");
}
void build_cache(struct libcache& cache, void* main) {
const uint64_t main_slide = get_slide(main);
// Find our lib (mapped) file
const void *thislib = find_header((void *)bruh);
// Find dyld lib (mapped) file using a no-sus function
const void *libdyld = find_header((void *)dyld_get_sdk_version);
cache.main = (void*)main;
cache.thislib = (void*)thislib;
cache.libdyld = (void*)libdyld;
uint32_t libsystem_hash =
calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib");
// From libdyld header, we can list exports table
// to find all function we want to use
//
// This way there is no leakage of functions we use to do our trick
// mostly to hide
// - _dyld_image_count
// - _dyld_get_image_name
// - _dyld_get_image_header
// - _dyld_get_image_vmaddr_slide
// The above functions are crucial to find all libraries loaded
// From which we will traverse the exports table to replace
// _got and _la_symbol_pointer data
// Our lib can hide more details too
// We can resolve all functions we use
// before resolving the main executable imports
//
// This will make our lib use only dyld_get_sdk_version
// For the main executable, imports are empty due to manual resolve
printf("executable header at %p\n", main);
printf("lib header at %p\n", thislib);
printf("libdyld header at %p\n", libdyld);
uint32_t trie_size;
void *libdyld_export_trie = get_export_trie(libdyld, trie_size);
// we have to traverse the trie to find these symbols
// because if we self-rebuild import table for **this** lib,
// these symbols aren't resolved
// so we have to resolve ourselves and then rebuild the symbols for others
typedef int (*dyld_image_count_t)(void);
typedef char *(*dyld_get_image_name_t)(int);
typedef void *(*dyld_get_image_header_t)(int);
int (*dyld_image_count_func)(void) = (dyld_image_count_t)find_in_export_trie(
libdyld, libdyld_export_trie, "__dyld_image_count");
void *(*dyld_get_image_header_func)(int) =
(dyld_get_image_header_t)find_in_export_trie(libdyld, libdyld_export_trie,
"__dyld_get_image_header");
char *(*dyld_get_image_name_func)(int) =
(dyld_get_image_name_t)find_in_export_trie(libdyld, libdyld_export_trie,
"__dyld_get_image_name");
cache.size = dyld_image_count_func();
cache.libs =
(struct libcache_item *)malloc(sizeof(struct libcache_item) * cache.size);
for (int i = 0; i < cache.size; i++) {
void *header = dyld_get_image_header_func(i);
char *name = dyld_get_image_name_func(i);
bootstrap_libcache_item(&cache.libs[i], header, name);
cache.libs[i].hash = calculate_libname_hash(&cache, name);
// printf("%p %s\n", header, name);
}
}
void fix(struct libcache& cache) {
// now we have function to find exported symbols
// it supports full name search or hash search
// to reserve space, we use the hash search
//
// so we will collect all imported symbols, and its offset to fix
// with legacy symbol resolve
// __got always has dyld_stub_binder
// __la_symbol_ptr
// with modern symbol resolve
// __got now contains full rebase/bind opcode
//
// the list of all imported symbols should be
// [(offset, name, libhash)]
// if we want to also fix framework/libraries used by the main executable,
// (only those that are not governed by the system)
// we should also have extra list(s) for that lib to resolve ourselves
//
// main: [(offset, name, libhash)]
// libA: [(offset, name, libhash)]
// libB: [(offset, name, libhash)]
//
// using the list is temporary for PoC
// we know that many symbols are exported from 1 lib
// so we can build a trie (yes, more trie)
// where the symbols are now concatenated with libhash 4 bytes as prefix
// and the offset is at the terminal node
//
// this way, we can reduce the libhash, although we need to build a trie
// build the trie is harder than traversing it
//
// just an idea, if we can somehow reduce the datasize then it would be better
// NOTES:
// mach_task_self() has a conflicting symbol or something,
// in symbol table it's: _mach_task_self_
// but have to search with: _mach_task_self
//
// so future replacement into mach_task_self has to use _mach_task_self
// despite the symbol is _mach_task_self_
//
// may need to look into why this happens so we can deal with this more
// generic
uint32_t libsystem_hash =
calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib");
// this is just for now,
// in the future we will self fix our lib so our lib also has no import
typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int);
typedef void *(*mach_task_self_t)();
mach_task_self_t mach_task_self_func =
(mach_task_self_t)custom_dlsym(&cache, libsystem_hash, "_mach_task_self");
vm_protect_t vm_protect_func =
(vm_protect_t)custom_dlsym(&cache, libsystem_hash, "_vm_protect");
int npage_rw_fixed = 0;
uint64_t page_rw_fixed[10]; // should be dynamic, but works for now
#include "out/b.h"
// think of a way to get what binary to fix
// so we can iterate through them
if (nimports == 0) {
printf("there is no imports to fix\n");
}
struct libcache_item* libfixing = get_libcache_with_name(&cache, lib_to_resolve);
// print_macho_summary(libfixing->header);
for (int i = 0; i < nimports; i++) {
struct imported_symbol symbol = imported_table[i];
uint64_t fix_at = symbol.offset + libfixing->segment[symbol.segment_i];
int need_rw_fix = true;
for (int j = 0; j < npage_rw_fixed; j++) {
if (page_rw_fixed[j] <= fix_at &&
page_rw_fixed[j] + 0x1000 > fix_at) {
need_rw_fix = false;
}
}
if (need_rw_fix) {
uint64_t start_page = fix_at - (fix_at % 0x1000);
vm_protect_func(mach_task_self_func(), start_page, 0x1000, 0,
VM_PROT_READ | VM_PROT_WRITE);
page_rw_fixed[npage_rw_fixed++] = start_page;
printf("modify page starts at 0x%llx to RW\n", start_page);
}
void *resolved;
// search with hash is faster
resolved = custom_dlsym(&cache, symbol.hash, symbol.name);
if (resolved == 0) {
// but fuck apple they have relative path and rpath
resolved = custom_dlsym(&cache, symbol.lib, symbol.name);
}
*(uint64_t *)fix_at = (uint64_t)resolved;
printf("imports need to fix: (0x%x)%s at 0x%llx\n", symbol.hash,
symbol.name, fix_at);
printf(" from=%s\n", symbol.lib);
printf(" segment id=%d; offset=0x%llx;", symbol.segment_i, symbol.offset);
printf(" resolved=%llx(%p)\n", *(uint64_t*)fix_at, resolved);
}
// TODO: Reformat the region as per before, or leave as it
// for (int j = 0; j < npage_rw_fixed; j++) {
// uint64_t start_page = page_rw_fixed[j];
// vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0, VM_PROT_READ);
// }
}
}
void test(struct libcache& cache) {
uint32_t libsystem_hash =
calculate_libname_hash(&cache, "/usr/lib/libSystem.B.dylib");
if (false) { // test search using name
void *printf_func =
custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_printf");
printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf);
void *vm_protect_func =
custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib", "_vm_protect");
printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func,
vm_protect);
// using relative path
void *func_c_1 =
custom_dlsym(&cache, "./out/libb.dylib", "__Z15exported_from_cv");
printf("Indirect search: Found=%p Expected=%p\n", func_c_1,
exported_from_c);
// using rpath
void *func_c_2 =
custom_dlsym(&cache, "@rpath/libb.dylib", "__Z15exported_from_cv");
printf("Indirect search: Found=%p Expected=%p\n", func_c_2,
exported_from_c);
}
if (false) { // test search using hash of name
void *printf_func = custom_dlsym(&cache, libsystem_hash, "_printf");
printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf);
void *vm_protect_func = custom_dlsym(&cache, libsystem_hash, "_vm_protect");
printf("Indirect search: Found=%p Expected=%p\n", vm_protect_func,
vm_protect);
void *realpath_func = custom_dlsym(&cache, libsystem_hash, "_realpath$DARWIN_EXTSN");
printf("Indirect search: Found=%p Expected=%p\n", realpath_func, realpath);
}
}