From ed2f09348e91c8ccd20a1947d4a5626c26e3a28e Mon Sep 17 00:00:00 2001 From: nganhkhoa Date: Mon, 10 Jul 2023 14:14:03 +0700 Subject: [PATCH] compress the extracted information --- .../internal/wrapper/action/save_imports.go | 104 +++++++++++++----- macho-go/internal/wrapper/cli.go | 99 ++++++++++++----- macho-go/proto/macho_info.proto | 24 +++- research/custom_loader/b.cc | 81 ++++++++------ research/custom_loader/build.sh | 6 +- 5 files changed, 220 insertions(+), 94 deletions(-) diff --git a/macho-go/internal/wrapper/action/save_imports.go b/macho-go/internal/wrapper/action/save_imports.go index 094d47a..ae73ec9 100644 --- a/macho-go/internal/wrapper/action/save_imports.go +++ b/macho-go/internal/wrapper/action/save_imports.go @@ -1,7 +1,8 @@ package action import ( - "fmt" + // "fmt" + "sort" // log "github.com/sirupsen/logrus" . "ios-wrapper/internal/wrapper/ofile" @@ -11,25 +12,46 @@ import ( type saveImports struct{} func (action *saveImports) withMacho(mf *MachoFile) error { - calculateHash := func(name string) uint32 { - var h uint32 = 0x811c9dc5 - for _, s := range name { - h ^= uint32(s) - h *= 0x01000193 - } - return h - } + // calculateHash := func(name string) uint32 { + // var h uint32 = 0x811c9dc5 + // for _, s := range name { + // h ^= uint32(s) + // h *= 0x01000193 + // } + // return h + // } mc := mf.Context() - symbols := []*protomodel.MachoInfo_BindSymbol{} - fmt.Println("struct imported_symbol {const char* name; const char* lib; uint32_t hash; int segment_i; uint64_t offset;};") - fmt.Println("const char* lib_to_resolve = \"main\";") - fmt.Println("struct imported_symbol imported_table[] = {") - for _, symbol := range mc.CollectBindSymbols() { + // symbols_storage := []*protomodel.MachoInfo_AllImportedSymbols{} + symbols_raw := mc.CollectBindSymbols() + sort.Slice(symbols_raw, func(i, j int) bool { + orderedByLibrary := symbols_raw[i].Dylib() < symbols_raw[j].Dylib() + if symbols_raw[i].Dylib() == symbols_raw[j].Dylib() { + orderedBySymbol := symbols_raw[i].Name() < symbols_raw[j].Name() + return orderedBySymbol + } + return orderedByLibrary + }) + + libs := []string{} + symbols := []string{} + tables := []*protomodel.MachoInfo_LibraryImportedSymbols{} + + var current_table *protomodel.MachoInfo_LibraryImportedSymbols + + current_lib := "" + current_symbol := "" + + current_lib_idx := -1 + current_symbol_idx := -1 + + // now we expect everything is sorted and easier to build strings tables + // this is not fully optimized, there can be repeated symbol name in different libraries + for _, symbol := range symbols_raw { if symbol.Type() != "lazy" { continue } - dylib_hash := calculateHash(symbol.Dylib()) + // dylib_hash := calculateHash(symbol.Dylib()) seg := mc.Segments()[symbol.Segment()] var offset uint64 @@ -42,22 +64,48 @@ func (action *saveImports) withMacho(mf *MachoFile) error { offset = symbol.Address() - seg.Fileoff() } - fmt.Printf("{\"%s\", \"%s\", 0x%x, 0x%x, 0x%x},\n", - symbol.Name(), symbol.Dylib(), dylib_hash, symbol.Segment(), offset) - - symbols = append(symbols, - &protomodel.MachoInfo_BindSymbol{ - Name: symbol.Name(), - Libname: symbol.Dylib(), - Libhash: dylib_hash, - Segment: symbol.Segment(), - Offset: offset, + if current_lib != symbol.Dylib() { + current_lib_idx += len(current_lib) + 1 + current_lib = symbol.Dylib() + libs = append(libs, symbol.Dylib()) + tables = append(tables, &protomodel.MachoInfo_LibraryImportedSymbols{ + LibIndex: uint32(current_lib_idx), + Nsymbols: 0, + Symbols: []*protomodel.MachoInfo_SymbolTable{}, }) + current_table = tables[len(tables)-1] + } + if current_symbol != symbol.Name() { + current_symbol_idx += len(current_symbol) + 1 + current_symbol = symbol.Name() + symbols = append(symbols, symbol.Name()) + } + current_table.Nsymbols += 1 + current_table.Symbols = append(current_table.Symbols, &protomodel.MachoInfo_SymbolTable{ + SymbolIndex: uint32(current_symbol_idx), + SegmentIndex: symbol.Segment(), + Offset: uint32(offset), + }) + // fmt.Printf("{\"%s\", \"%s\", 0x%x, 0x%x, 0x%x},\n", + // symbol.Name(), symbol.Dylib(), symbol.Segment(), offset) + + // symbols = append(symbols, + // &protomodel.MachoInfo_BindSymbol{ + // Name: symbol.Name(), + // Libname: symbol.Dylib(), + // Libhash: dylib_hash, + // Segment: symbol.Segment(), + // Offset: offset, + // }) } - fmt.Println("};") - fmt.Printf("uint32_t nimports = %d;\n", len(symbols)) - mf.Info().Symbols = symbols + mf.Info().Symbols = &protomodel.MachoInfo_AllImportedSymbols{ + Libs: libs, + Symbols: symbols, + Tables: tables, + } + + mf.Info().Main = mc.Main() return nil } diff --git a/macho-go/internal/wrapper/cli.go b/macho-go/internal/wrapper/cli.go index 1b992ed..6e16d40 100644 --- a/macho-go/internal/wrapper/cli.go +++ b/macho-go/internal/wrapper/cli.go @@ -1,10 +1,11 @@ package wrapper import ( + "bufio" "fmt" "io/ioutil" "os" - "strings" + // "strings" "github.com/alecthomas/kong" log "github.com/sirupsen/logrus" @@ -194,11 +195,11 @@ func displayBcell(bfile string) { ) } fmt.Printf(" | Bind Symbols:\n") - for _, symbol := range info.Symbols { - lib := strings.Replace(symbol.Libname, "/System/Library/Frameworks/", "", 1) - fmt.Printf(" | %s offset=0x%x segmentID=0x%x\n", symbol.Name, symbol.Offset, symbol.Segment) - fmt.Printf(" | from=%s\n", lib) - } + // for _, symbol := range info.Symbols { + // lib := strings.Replace(symbol.Libname, "/System/Library/Frameworks/", "", 1) + // fmt.Printf(" | %s offset=0x%x segmentID=0x%x\n", symbol.Name, symbol.Offset, symbol.Segment) + // fmt.Printf(" | from=%s\n", lib) + // } } } @@ -231,33 +232,73 @@ func resolveAddresses(dwarf string, load string, addresses []string) { func bcell2header(bfile string, header string) { raw_data, err := ioutil.ReadFile(bfile) - if err != nil { - log.Panic("Invalid Protobuf bcell.dat (1)") - } data := &protomodel.BcellFile{} err = proto.Unmarshal(raw_data, data) if err != nil { - log.Panic("Invalid Protobuf bcell.dat (2)") + log.Panic("Invalid Protobuf bcell.dat") } - fmt.Printf("[+] User Config: %+v\n", data.BcellConfig) - for arch, info := range data.MachoInfos { - fmt.Printf("[+] Arch %s:\n", arch) - fmt.Printf(" | PointerSize : %+v\n", info.PointerSize) - fmt.Printf(" | Image Base : 0x%x\n", info.ImageBase) - fmt.Printf(" | Init Pointers:\n") - for _, init_ptr := range info.InitPointers { - fmt.Printf( - " | offset 0x%x => addr 0x%x\n", - init_ptr.Offset, - init_ptr.Value, - ) - } - fmt.Printf(" | Bind Symbols:\n") - for _, symbol := range info.Symbols { - lib := strings.Replace(symbol.Libname, "/System/Library/Frameworks/", "", 1) - fmt.Printf(" | %s offset=0x%x segmentID=0x%x\n", symbol.Name, symbol.Offset, symbol.Segment) - fmt.Printf(" | from=%s\n", lib) - } + f, err := os.Create(header) + if err != nil { + log.Panic("Cannot open header file for writing") } + defer f.Close() + + w := bufio.NewWriter(f) + + // fmt.Printf("[+] User Config: %+v\n", data.BcellConfig) + fmt.Fprintf(w, "#include\n") + fmt.Fprintf(w, "namespace bshield_data{\n") + for arch, info := range data.MachoInfos { + fmt.Fprintf(w, "const char* arch = \"%s\";\n", arch) + fmt.Fprintf(w, "unsigned int pointer_size = %d;\n", info.PointerSize) + fmt.Fprintf(w, "uint64_t image_base = 0x%x;\n", info.ImageBase) + fmt.Fprintf(w, "uint64_t main = 0x%x;\n", info.Main) + + fmt.Fprintf(w, "struct init_pointer {uint64_t offset; uint64_t value;};\n") + fmt.Fprintf(w, "int num_init_pointers = %d;\n", len(info.InitPointers)) + fmt.Fprintf(w, "struct init_pointer init_pointers_offsets[] = {\n") + for _, init_ptr := range info.InitPointers { + fmt.Fprintf(w, " {0x%x, 0x%x},\n", init_ptr.Offset, init_ptr.Value) + } + fmt.Fprintf(w, "};\n") + + fmt.Fprintf(w, "__attribute__((section(\"__DATA,.bshield_lib\")))\n") + fmt.Fprintf(w, "char libs[] =\n") + for _, lib := range info.Symbols.Libs { + fmt.Fprintf(w, " \"%s\\0\"\n", lib) + } + fmt.Fprintf(w, ";\n") + + fmt.Fprintf(w, "__attribute__((section(\"__DATA,.bshield_sym\")))\n") + fmt.Fprintf(w, "char symbols[] =\n") + for _, symbol := range info.Symbols.Symbols { + fmt.Fprintf(w, " \"%s\\0\"\n", symbol) + } + fmt.Fprintf(w, ";\n") + + fmt.Fprintf(w, "// very compact symbol table,\n") + fmt.Fprintf(w, "// [lib idx/*4 bytes*/, nsymbol/*4 byte*/]\n") + fmt.Fprintf(w, "// repeate nsymbol times [name offset/*3 bytes*/, segment idx/**/, offset /*4 btyes*/]\n") + fmt.Fprintf(w, "// name offset is 3 bytes because we don't think we should have a table size > 2^(3 * 8)\n") + + fmt.Fprintf(w, "__attribute__((section(\"__DATA,.bshield_code\")))\n") + fmt.Fprintf(w, "uint32_t encoded_table[] = {\n") + n_instructions := 0 + for i, table := range info.Symbols.Tables { + fmt.Fprintf(w, " // %s\n", info.Symbols.Libs[i]) + fmt.Fprintf(w, " %d/*lib offset*/,\n", table.LibIndex) + fmt.Fprintf(w, " %d/*nsymbols*/,\n", table.Nsymbols) + n_instructions += 2 + for _, symbol := range table.Symbols { + fmt.Fprintf(w, " %d, 0x%x,\n", (symbol.SymbolIndex<<8)|symbol.SegmentIndex, symbol.Offset) + n_instructions += 2 + } + fmt.Fprintf(w, "\n") + } + fmt.Fprintf(w, "};\n") + fmt.Fprintf(w, "uint32_t n_instructions = %d;\n", n_instructions) + } + fmt.Fprintf(w, "}// namespace bshield_data\n") + w.Flush() } diff --git a/macho-go/proto/macho_info.proto b/macho-go/proto/macho_info.proto index f46e33c..a2aecdb 100644 --- a/macho-go/proto/macho_info.proto +++ b/macho-go/proto/macho_info.proto @@ -27,8 +27,28 @@ message MachoInfo { uint64 offset = 5; // offset in segment } + message SymbolTable { + uint32 symbolIndex = 1; + uint32 segmentIndex = 2; + uint32 offset = 3; + } + message LibraryImportedSymbols { + uint32 libIndex = 1; + uint32 nsymbols = 2; + repeated SymbolTable symbols = 3; + } + + message AllImportedSymbols { + repeated string libs = 1; + repeated string symbols = 2; + repeated LibraryImportedSymbols tables = 3; + } + PointerSize pointer_size = 1; uint64 image_base = 2; - repeated InitPointer init_pointers = 3; - repeated BindSymbol symbols = 4; + uint64 main = 3; + repeated InitPointer init_pointers = 4; + // repeated BindSymbol symbols = 5; + AllImportedSymbols symbols = 5; + } diff --git a/research/custom_loader/b.cc b/research/custom_loader/b.cc index 435e7cd..0f9cd6f 100644 --- a/research/custom_loader/b.cc +++ b/research/custom_loader/b.cc @@ -4,6 +4,8 @@ #include #include +#include "out/b.h" + char *pwd; uint32_t pwd_len; @@ -927,46 +929,59 @@ void fix(struct libcache& cache) { int npage_rw_fixed = 0; uint64_t page_rw_fixed[10]; // should be dynamic, but works for now -#include "out/b.h" // think of a way to get what binary to fix // so we can iterate through them - if (nimports == 0) { - printf("there is no imports to fix\n"); - } + char* lib_to_resolve = "main"; struct libcache_item* libfixing = get_libcache_with_name(&cache, lib_to_resolve); - for (int i = 0; i < nimports; i++) { - struct imported_symbol symbol = imported_table[i]; - uint64_t fix_at = symbol.offset + libfixing->segment[symbol.segment_i]; - int need_rw_fix = true; - for (int j = 0; j < npage_rw_fixed; j++) { - if (page_rw_fixed[j] <= fix_at && - page_rw_fixed[j] + 0x1000 > fix_at) { - need_rw_fix = false; + int pc = 0; + for (;pc != bshield_data::n_instructions;) { + uint32_t libidx = bshield_data::encoded_table[pc]; + uint32_t nsym = bshield_data::encoded_table[pc + 1]; + pc += 2; + + char* lib = bshield_data::libs + libidx; + for (int i = 0; i < nsym; i++) { + uint32_t op = bshield_data::encoded_table[pc]; + uint32_t offset = bshield_data::encoded_table[pc + 1]; + pc += 2; + + uint32_t symidx = op >> 8; + uint32_t segment = op & 0xff; + char* sym = bshield_data::symbols + symidx; + + uint64_t fix_at = offset + libfixing->segment[segment]; + + // enable WRITE protection for this data segment + int need_rw_fix = true; + for (int j = 0; j < npage_rw_fixed; j++) { + if (page_rw_fixed[j] <= fix_at && + page_rw_fixed[j] + 0x1000 > fix_at) { + need_rw_fix = false; + } + } + if (need_rw_fix) { + uint64_t start_page = fix_at - (fix_at % 0x1000); + vm_protect_func(mach_task_self_func(), start_page, 0x1000, 0, + VM_PROT_READ | VM_PROT_WRITE); + page_rw_fixed[npage_rw_fixed++] = start_page; + printf("modify page starts at 0x%llx to RW\n", start_page); } - } - if (need_rw_fix) { - uint64_t start_page = fix_at - (fix_at % 0x1000); - vm_protect_func(mach_task_self_func(), start_page, 0x1000, 0, - VM_PROT_READ | VM_PROT_WRITE); - page_rw_fixed[npage_rw_fixed++] = start_page; - printf("modify page starts at 0x%llx to RW\n", start_page); - } - void *resolved; - // search with hash is faster - resolved = custom_dlsym(&cache, symbol.hash, symbol.name); - if (resolved == 0) { - // but fuck apple they have relative path and rpath - resolved = custom_dlsym(&cache, symbol.lib, symbol.name); - } - *(uint64_t *)fix_at = (uint64_t)resolved; + void *resolved = 0; + // search with hash is faster + // resolved = custom_dlsym(&cache, symbol.hash, symbol.name); + if (resolved == 0) { + // but fuck apple they have relative path and rpath + resolved = custom_dlsym(&cache, lib, sym); + } + *(uint64_t *)fix_at = (uint64_t)resolved; - printf("imports need to fix: (0x%x)%s at 0x%llx\n", symbol.hash, - symbol.name, fix_at); - printf(" from=%s\n", symbol.lib); - printf(" segment id=%d; offset=0x%llx;", symbol.segment_i, symbol.offset); - printf(" resolved=%llx(%p)\n", *(uint64_t*)fix_at, resolved); + printf("imports need to fix: %s at 0x%llx\n", sym, fix_at); + printf(" from=%s\n", lib); + printf(" segment id=%d; offset=0x%llx;", segment, offset); + printf(" resolved=%llx(%p)\n", *(uint64_t*)fix_at, resolved); + } } // TODO: Reformat the region as per before, or leave as it diff --git a/research/custom_loader/build.sh b/research/custom_loader/build.sh index f7dc694..4ef3bb6 100755 --- a/research/custom_loader/build.sh +++ b/research/custom_loader/build.sh @@ -42,7 +42,8 @@ clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared -Wl,-reexport_l clang++ -mmacosx-version-min=$VERSION -o $OUT/a -L"./out" -lb a.cc # extract symbols from a -../../macho-go/bin/ios-wrapper pepe -o $OUT/a-fixed -b $OUT/b.bcell --remove-imports --remove-exports --remove-symbol-table $OUT/a > $OUT/b.h +../../macho-go/bin/ios-wrapper pepe -o $OUT/a-fixed -b $OUT/b.bcell --remove-imports --remove-exports --remove-symbol-table $OUT/a +../../macho-go/bin/ios-wrapper bcell2header -b $OUT/b.bcell -o $OUT/b.h # build libb with symbols extracted from a clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared -Wl,-reexport_library out/libc.dylib b.cc @@ -62,7 +63,8 @@ clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared -Wl,-reexport_l clang -fobjc-arc -ObjC -mmacosx-version-min=$VERSION -o $OUT/a -L"./out" -lb a.mm # extract symbols from a -../../macho-go/bin/ios-wrapper pepe -o $OUT/a-fixed -b $OUT/b.bcell --remove-imports --remove-exports --remove-symbol-table $OUT/a > $OUT/b.h +../../macho-go/bin/ios-wrapper pepe -o $OUT/a-fixed -b $OUT/b.bcell --remove-imports --remove-exports --remove-symbol-table $OUT/a +../../macho-go/bin/ios-wrapper bcell2header -b $OUT/b.bcell -o $OUT/b.h # build libb with symbols extracted from a clang++ -mmacosx-version-min=$VERSION -o $OUT/libb.dylib -shared -Wl,-reexport_library out/libc.dylib b.cc