update custom loader scripts

This commit is contained in:
nganhkhoa 2023-06-01 17:29:45 +07:00
parent b5ee7124ab
commit 4fd6bd166e
5 changed files with 682 additions and 3 deletions

View File

@ -0,0 +1,8 @@
#include <stdio.h>
void exported_from_c();
int main() {
printf("Hello World\n");
exported_from_c();
}

630
research/custom_loader/b.cc Normal file
View File

@ -0,0 +1,630 @@
#include <mach-o/dyld.h>
#include <mach/mach.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
char *pwd;
uint32_t pwd_len;
int custom_strcmp(const char *p1, const char *p2) {
const unsigned char *s1 = (const unsigned char *)p1;
const unsigned char *s2 = (const unsigned char *)p2;
unsigned char c1, c2;
do {
c1 = (unsigned char)*s1++;
c2 = (unsigned char)*s2++;
if (c1 == '\0')
return c1 - c2;
} while (c1 == c2);
return c1 - c2;
}
void set_cwd(const char *const *envp) {
while (*envp) {
// PWD=
if (0x3d445750 == *(uint32_t *)(*envp)) {
break;
}
envp++;
}
pwd = (char *)(*envp + 4);
for (; pwd[pwd_len] != 0; pwd_len++)
;
printf("%s\n", pwd);
}
const uint32_t magic64 = 0xfeedfacf;
const uint32_t magic32 = 0xfeedface;
struct ProgramVars {
void *mh; // mach_header or mach_header64
int *NXArgcPtr;
const char ***NXArgvPtr;
const char ***environPtr;
const char **__prognamePtr;
};
struct libcache_item {
void *header;
void *trie;
uint32_t hash;
};
struct libcache {
struct libcache_item *libs;
uint32_t size;
void *main;
void *thislib;
void *libdyld;
};
// try these hashes
// https://gist.github.com/sgsfak/9ba382a0049f6ee885f68621ae86079b
uint32_t fnv_hash(const char *str) {
unsigned char *s = (unsigned char *)str; /* unsigned string */
/* See the FNV parameters at www.isthe.com/chongo/tech/comp/fnv/#FNV-param */
const uint32_t FNV_32_PRIME = 0x01000193; /* 16777619 */
uint32_t h = 0x811c9dc5; /* 2166136261 */
while (*s != 0) {
/* xor the bottom with the current octet */
h ^= *s++;
/* multiply by the 32 bit FNV magic prime mod 2^32 */
h *= FNV_32_PRIME;
}
return h;
}
uint32_t calculate_libname_hash(const char *name) {
uint32_t hash;
uint32_t (*hash_func)(const char *) = fnv_hash;
if (name[0] == '.') {
// resolve relative path with ./ ../ ../../ and so on
char *p = realpath(name, 0);
hash = hash_func(p);
free(p);
} else if (name[0] == '@') {
// TODO: resolve @rpath
// ohyeah this is gonna be wild
// loop through all rpath and resolve that rpath
// then resolve the full path for all rpath
//
// which rpath is correct can be done by checking if the cache has that hash
printf("resolver for @rpath is not supported yet\n");
} else {
hash = hash_func(name);
}
return hash;
}
extern "C" uint32_t dyld_get_sdk_version(const mach_header *mh);
void decode_uleb128(char *&addr, uint32_t *ret) {
uint32_t result = 0;
int shift = 0;
while (1) {
unsigned char byte = *(unsigned char *)(addr);
addr++;
result |= (byte & 0x7f) << shift;
shift += 7;
if (!(byte & 0x80))
break;
}
*ret = result;
}
void *find_header(void *_func) {
// Approach 1: (not stable)
// we assume that text section is small enough to fit on 1 page
// so the header should stay at the top of the page due to allocation logic
// the slice/slide is random but always align 0x1000 so we test a few values
// to see if the magic value is found
//
// Guaranteed to stop, but search range is small
// const uint64_t page_size = 0x4000;
// uint64_t func = (uint64_t)_func;
// uint64_t potential_head = func + (0x4000 - (func % page_size));
// void* head = 0;
// for (uint64_t i = 0x1000; i < 0xf000; i+=0x1000) {
// uint32_t* x = (uint32_t*)(potential_head - i);
// if (*x == magic64 || *x == magic32) {
// head = (void*)x;
// break;
// }
// }
// return head;
// Approach 2: (more stable)
// We know that the header is 0x1000 aligned,
// just loop until the magic value is found
// Using while loop so ¯\_(ツ)_/¯
const uint64_t page_size = 0x1000;
uint64_t func = (uint64_t)_func;
uint64_t potential_head = func + (0x1000 - (func % page_size));
void *head = 0;
uint32_t *x = (uint32_t *)(potential_head);
while (*x != magic64 && *x != magic32) {
x -= 0x1000 / 4;
}
return (void *)x;
}
uint64_t get_slide(const void *header) {
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
uint64_t slice = 0;
const uint32_t ncmds = *((uint32_t *)header + 4);
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd == LC_SEGMENT_64) {
char *name = (char *)((uint64_t *)ptr + 1);
uint64_t vmaddr = *((uint64_t *)ptr + 3);
uint64_t fileoffset = *((uint64_t *)ptr + 5);
if (custom_strcmp(name, "__TEXT") == 0) {
slice = (uint64_t)header - vmaddr;
return slice;
}
}
ptr += cmdsize;
}
return 0;
}
void print_macho_summary(const void *header) {
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
const uint32_t ncmds = *((uint32_t *)header + 4);
printf("parsing macho at %p\n", header);
printf("ncmds %x\n", ncmds);
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
printf(" cmd %x %x\n", cmd, cmdsize);
if (cmd == LC_DYLD_EXPORTS_TRIE) {
const uint32_t offset = *((uint32_t *)ptr + 2);
const uint32_t size = *((uint32_t *)ptr + 3);
printf(" export trie: offset=0x%x size=0x%x\n", offset, size);
}
if (cmd == LC_SEGMENT_64) {
char *name = (char *)((uint64_t *)ptr + 1);
uint64_t vmaddr = *((uint64_t *)ptr + 3);
uint64_t vmsize = *((uint64_t *)ptr + 4);
uint64_t fileoffset = *((uint64_t *)ptr + 5);
uint64_t filesize = *((uint64_t *)ptr + 6);
if (custom_strcmp(name, "__TEXT") == 0) {
uint64_t slide = (uint64_t)header - vmaddr;
printf(" --- slide=0x%llx ---\n", slide);
}
printf(" Segment %s\n", name);
printf(" vmaddr=0x%llx fileoffset=0x%llx\n", vmaddr, fileoffset);
printf(" vmsize=0x%llx filesize=0x%llx\n", vmsize, filesize);
}
if (cmd == LC_REEXPORT_DYLIB) {
uint32_t name_offset = *((uint32_t *)ptr + 2);
char *name = (char *)ptr + name_offset;
printf(" reexport lib %s\n", name);
}
ptr += cmdsize;
}
}
void *get_export_trie(const void *header, uint32_t &size) {
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
uint64_t slice = 0;
uint64_t linkedit_vmaddr = 0;
uint64_t linkedit_fileoffset = 0;
const uint32_t ncmds = *((uint32_t *)header + 4);
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd == LC_DYLD_EXPORTS_TRIE) {
const uint32_t offset = *((uint32_t *)ptr + 2);
size = *((uint32_t *)ptr + 3);
uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset;
return (void *)(linkedit_vmaddr + slice + offset_in_linkedit);
}
if (cmd == LC_DYLD_INFO_ONLY) {
const uint32_t offset = *((uint32_t *)ptr + 10);
size = *((uint32_t *)ptr + 11);
uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset;
return (void *)(linkedit_vmaddr + slice + offset_in_linkedit);
}
if (cmd == LC_SEGMENT_64) {
char *name = (char *)((uint64_t *)ptr + 1);
uint64_t vmaddr = *((uint64_t *)ptr + 3);
uint64_t fileoffset = *((uint64_t *)ptr + 5);
if (custom_strcmp(name, "__TEXT") == 0) {
slice = (uint64_t)header - vmaddr;
} else if (custom_strcmp(name, "__LINKEDIT") == 0) {
linkedit_vmaddr = vmaddr;
linkedit_fileoffset = fileoffset;
}
}
ptr += cmdsize;
}
return 0;
}
uint32_t should_follow_symbol(char *&buffer, char *&_find) {
// printf("follow check %s has prefix: %s\n", _find, buffer);
char *find = _find;
char is_prefix = true;
while (1) {
int find_end = *find == 0;
int buffer_end = *buffer == 0;
int check = *buffer == *find;
// printf("check is %x == %x\n", *buffer, *find);
if (buffer_end) {
// we must always run to the end of buffer, marked 0x00
buffer++;
break;
}
if (find_end) {
// symbol to find is shorter than current buffer string
// but we still need to run to the end of buffer
// so just set not prefix
is_prefix = false;
}
if (!check) {
is_prefix = false;
}
buffer++;
find++;
}
// only move forward if is_prefix
if (is_prefix) {
_find = find;
// printf("prefix is found\n");
}
return is_prefix;
}
void *find_in_export_trie(const void *header, void *trie, const char *symbol) {
uint32_t func = 0;
char *ptr = (char *)trie;
char *find = (char *)symbol;
while (1) {
// terminal node will have data
uint32_t data_count = 0;
decode_uleb128(ptr, &data_count);
if (data_count != 0) {
// printf("reached terminal node\n");
if (*find != 0) {
// it reaches terminal node but symbol is not fully scanned
return 0;
}
break;
}
char num_child = ptr[0];
ptr++;
int still_following = 0;
for (char i = 0; i < num_child; i++) {
still_following = should_follow_symbol(ptr, find);
uint32_t follow_offset;
decode_uleb128(ptr, &follow_offset);
if (still_following) {
ptr = (char *)trie + follow_offset;
break;
}
}
if (!still_following) {
// symbol not found
return 0;
}
}
char count = *(ptr - 1);
ptr++; // flags
// uleb128 offset
decode_uleb128(ptr, &func);
return (void *)((char *)header + func);
}
void *find_in_lib(struct libcache *cache, struct libcache_item *lib,
const char *symbol);
void *find_in_reexport(struct libcache *cache, struct libcache_item *lib,
const char *symbol) {
void *header = lib->header;
const uint32_t magic = *(uint32_t *)header;
char *ptr = (char *)header;
if (magic == magic64) {
ptr += 0x20;
} else {
ptr += 0x20 - 0x4;
}
const uint32_t ncmds = *((uint32_t *)header + 4);
for (int i = 0; i < ncmds; i++) {
const uint32_t cmd = *((uint32_t *)ptr + 0);
const uint32_t cmdsize = *((uint32_t *)ptr + 1);
if (cmd != LC_REEXPORT_DYLIB) {
ptr += cmdsize;
continue;
}
uint32_t name_offset = *((uint32_t *)ptr + 2);
char *name = (char *)ptr + name_offset;
uint32_t hash = calculate_libname_hash(name);
for (int j = 0; j < cache->size; j++) {
struct libcache_item reexport = cache->libs[j];
if (reexport.hash != hash) {
continue;
}
void *found = find_in_lib(cache, &reexport, symbol);
if (found)
return found;
}
ptr += cmdsize;
}
return 0;
}
void *find_in_lib(struct libcache *cache, struct libcache_item *lib,
const char *symbol) {
void *direct = find_in_export_trie(lib->header, lib->trie, symbol);
if (direct)
return direct;
// we cannot find in directly exported trie, so we loop through all reexport
// libs
return find_in_reexport(cache, lib, symbol);
}
void *custom_dlsym(struct libcache *cache, const char *libname,
const char *symbol) {
uint32_t hash = calculate_libname_hash(libname);
for (int i = 0; i < cache->size; i++) {
struct libcache_item cache_lib = cache->libs[i];
if (cache_lib.hash == hash) {
return find_in_lib(cache, &cache_lib, symbol);
}
}
printf("cannot find lib with hash 0x%x\n", hash);
return 0;
}
void *custom_dlsym(struct libcache *cache, uint32_t hash, const char *symbol) {
for (int i = 0; i < cache->size; i++) {
struct libcache_item cache_lib = cache->libs[i];
if (cache_lib.hash == hash) {
return find_in_lib(cache, &cache_lib, symbol);
}
}
return 0;
}
int hook_printf(const char *format, ...) {
va_list args;
va_start(args, format);
printf("HOOKED BEGIN LOL\n");
int status = printf(format, args);
printf("HOOKED END LOL\n");
va_end(args);
return status;
}
__attribute__((constructor)) static void
bruh(int argc, const char *const argv[], const char *const envp[],
const char *const apple[], const struct ProgramVars *vars) {
set_cwd(envp);
// ProgramVars contains pointer to main executable (mapped) file
const void *main = (int *)(vars->mh);
const uint64_t main_slide = get_slide(main);
// Find our lib (mapped) file
const void *thislib = find_header((void *)bruh);
// Find dyld lib (mapped) file using a no-sus function
const void *libdyld = find_header((void *)dyld_get_sdk_version);
uint32_t libsystem_hash =
calculate_libname_hash("/usr/lib/libSystem.B.dylib");
struct libcache cache = {0, nlib, main, thislib, libdyld};
// From libdyld header, we can list exports table
// to find all function we want to use
//
// This way there is no leakage of functions we use to do our trick
// mostly to hide
// - _dyld_image_count
// - _dyld_get_image_name
// - _dyld_get_image_header
// - _dyld_get_image_vmaddr_slide
// The above functions are crucial to find all libraries loaded
// From which we will traverse the exports table to replace
// _got and _la_symbol_pointer data
// Our lib can hide more details too
// We can resolve all functions we use
// before resolving the main executable imports
//
// This will make our lib use only dyld_get_sdk_version
// For the main executable, imports are empty due to manual resolve
printf("executable header at %p\n", main);
printf("lib header at %p\n", thislib);
printf("libdyld header at %p\n", libdyld);
uint32_t trie_size;
void *thislib_export_trie = get_export_trie(thislib, trie_size);
void *libdyld_export_trie = get_export_trie(libdyld, trie_size);
typedef int (*dyld_image_count_t)(void);
typedef char *(*dyld_get_image_name_t)(int);
typedef void *(*dyld_get_image_header_t)(int);
int (*dyld_image_count_func)(void) = (dyld_image_count_t)find_in_export_trie(
libdyld, libdyld_export_trie, "__dyld_image_count");
void *(*dyld_get_image_header_func)(int) =
(dyld_get_image_header_t)find_in_export_trie(libdyld, libdyld_export_trie,
"__dyld_get_image_header");
char *(*dyld_get_image_name_func)(int) =
(dyld_get_image_name_t)find_in_export_trie(libdyld, libdyld_export_trie,
"__dyld_get_image_name");
uint32_t nlib = dyld_image_count_func();
struct libcache_item *liblist =
(struct libcache_item *)malloc(sizeof(struct libcache_item) * nlib);
cache->libs = liblist;
for (int i = 0; i < cache.size; i++) {
void *header = dyld_get_image_header_func(i);
char *name = dyld_get_image_name_func(i);
cache.libs[i].header = header;
cache.libs[i].trie = get_export_trie(header, trie_size);
cache.libs[i].hash = calculate_libname_hash(name);
printf("%s %x\n", name, cache.libs[i].hash);
}
// { // test search using name
// void* printf_func = custom_dlsym(&cache, "/usr/lib/libSystem.B.dylib",
// "_printf"); printf("Indirect search: Found=%p Expected=%p\n",
// printf_func, printf);
// void* vm_protect_func = custom_dlsym(&cache,
// "/usr/lib/libSystem.B.dylib", "_vm_protect"); printf("Indirect search:
// Found=%p Expected=%p\n", vm_protect_func, vm_protect);
// }
{ // test search using hash of name
// void* printf_func = custom_dlsym(&cache, libsystem_hash, "_printf");
// printf("Indirect search: Found=%p Expected=%p\n", printf_func, printf);
// void* vm_protect_func = custom_dlsym(&cache, libsystem_hash,
// "_vm_protect"); printf("Indirect search: Found=%p Expected=%p\n",
// vm_protect_func, vm_protect);
void *func_c =
custom_dlsym(&cache, "./out/libb.dylib", "__Z15exported_from_cv");
printf("Indirect search: Found=%p Expected=%p\n", func_c, 0);
}
// now we have function to find exported symbols
// it supports full name search or hash search
// to reserve space, we use the hash search
//
// so we will collect all imported symbols, and its offset to fix
// with legacy symbol resolve
// __got always has dyld_stub_binder
// __la_symbol_ptr
// with modern symbol resolve
// __got now contains full rebase/bind opcode
//
// the list of all imported symbols should be
// [(offset, name, libhash)]
// if we want to also fix framework/libraries used by the main executable,
// (only those that are not governed by the system)
// we should also have extra list(s) for that lib to resolve ourselves
//
// main: [(offset, name, libhash)]
// libA: [(offset, name, libhash)]
// libB: [(offset, name, libhash)]
//
// using the list is temporary for PoC
// we know that many symbols are exported from 1 lib
// so we can build a trie (yes, more trie)
// where the symbols are now concatenated with libhash 4 bytes as prefix
// and the offset is at the terminal node
//
// this way, we can reduce the libhash, although we need to build a trie
// build the trie is harder than traversing it
//
// just an idea, if we can somehow reduce the datasize then it would be better
// NOTES:
// mach_task_self() has a conflicting symbol or something,
// in symbol table it's: _mach_task_self_
// but have to search with: _mach_task_self
//
// so future replacement into mach_task_self has to use _mach_task_self
// despite the symbol is _mach_task_self_
//
// may need to look into why this happens so we can deal with this more
// generic
// this is just for now,
// in the future we will self fix our lib so our lib also has no import
typedef void *(*vm_protect_t)(void *, uint64_t, uint64_t, int, int);
typedef void *(*mach_task_self_t)();
mach_task_self_t mach_task_self_func =
(mach_task_self_t)custom_dlsym(&cache, libsystem_hash, "_mach_task_self");
vm_protect_t vm_protect_func =
(vm_protect_t)custom_dlsym(&cache, libsystem_hash, "_vm_protect");
int npage_rw_fixed = 0;
uint64_t page_rw_fixed[10]; // should be dynamic, but works for now
#include "out/b.h"
for (int i = 0; i < nimports; i++) {
struct imported_symbol symbol = imported_table[i];
int need_rw_fix = true;
for (int j = 0; j < npage_rw_fixed; j++) {
if (page_rw_fixed[j] <= symbol.address &&
page_rw_fixed[j] + 0x4000 > symbol.address) {
need_rw_fix = false;
}
}
if (need_rw_fix) {
uint64_t start_page = symbol.address - (symbol.address % 0x4000);
vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0,
VM_PROT_READ | VM_PROT_WRITE);
page_rw_fixed[npage_rw_fixed++] = start_page;
printf("modify page starts at 0x%llx to RW\n", start_page);
}
void *resolved;
// search with hash is faster
resolved = custom_dlsym(&cache, symbol.hash, symbol.name);
if (resolved == 0) {
// but fuck apple they have relative path and rpath
resolved = custom_dlsym(&cache, symbol.lib, symbol.name);
}
uint64_t fix_at = symbol.address + main_slide;
*(uint64_t *)fix_at = (uint64_t)resolved;
printf("imports need to fix: (0x%x)%s at 0x%llx\n", symbol.hash,
symbol.name, symbol.address);
printf(" resolved %p\n", resolved);
}
for (int j = 0; j < npage_rw_fixed; j++) {
uint64_t start_page = page_rw_fixed[j];
vm_protect_func(mach_task_self_func(), start_page, 0x4000, 0, VM_PROT_READ);
}
free(liblist);
}

View File

@ -1,8 +1,44 @@
set -ex
VERSION=14
VERSION=11
OUT=./out
LOGIC=2
mkdir -p $OUT
clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared b.c
clang++ -mmacosx-version-min=$VERSION -o $OUT/a a.c -L"./out" -lb
if [[ $LOGIC -eq 1 ]]
then
# full poc flow
echo "to be continue"
# remove imports
# remove mod init
# remove symtab dysymtab
# fix link edit section
elif [[ $LOGIC -eq 2 ]]
then
# remove imports test
# libc to test reexport custom lib
clang++ -mmacosx-version-min=$VERSION -o $OUT/libc.dylib -shared c.cc
# create our dummy lib first
clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared -Wl,-reexport_library out/libc.dylib dummy.cc
# build a references libb
clang++ -mmacosx-version-min=$VERSION -o $OUT/a -L"./out" -lb a.cc
# extract symbols from a
../../macho-go/bin/ios-wrapper remove-imports $OUT/a -o $OUT/a > $OUT/b.h
# build libb with symbols extracted from a
clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared -Wl,-reexport_library out/libc.dylib b.cc
else
# dummy test build
clang++ -mmacosx-version-min=$VERSION -o $OUT/libc.dylib -shared c.cc
clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared -Wl,-reexport_library out/libc.dylib b.cc
clang++ -mmacosx-version-min=$VERSION -o $OUT/a -L"./out" -lb a.cc
fi

View File

@ -0,0 +1,4 @@
#include <stdio.h>
void exported_from_c() {
printf("from c\n");
}

View File

@ -0,0 +1 @@