clean code and add comment

2024-01-04 06:34:07 +07:00
parent 7a6a41b4d8
commit 263596b1a1
2 changed files with 97 additions and 96 deletions
--- a/macho-go/pkg/ios/macho/edit.go
+++ b/macho-go/pkg/ios/macho/edit.go
@ -472,7 +472,8 @@ func (mc *MachoContext) ReworkForObjc() {
 	main_offset := int(mc.entryoff)
 	var shellcode_offset int

-	if (mc.header.cputype & 0xff) == 12 {
+	isArm := (mc.header.cputype & 0xff) == 12
+	if isArm {
 		shellcode = []uint32{
 			0x10000008,
 			0, // x9 = (offset end of __DATA) - (offset shellcode)
@ -513,37 +514,11 @@ func (mc *MachoContext) ReworkForObjc() {
 		// fmt.Printf("// movz_data_end_offset=%x\n", movz_data_end_offset)
 		fmt.Printf("// lc_main_offset=%x\n", lc_main_offset)
 	} else {
-		// TODO: fix to work with offset larger than 0xffff
-		// shellcode = []uint32{
-		// 	0x00058d4c,
-		// 	0x66000000,
-		// 	0, // offset
-		// 	0x57c8014d,
-		// 	0x41515256,
-		// 	0x088b4d50,
-		// 	0x41d1ff41,
-		// 	0x5e5a5958,
-		// 	0x488b4d5f,
-		// 	0xe1ff4108,
-		// }
-
-		shellcode_x := []uint8{
+		shellcode_start := []uint8{
 			0x4c, 0x8d, 0x05, 0x00, 0x00, 0x00, 0x00,
 			0x49, 0xC7, 0xC1,
 		}

-		offset := []uint8{0x00, 0x00, 0x00, 0x00} // offset
-		shellcode_offset = text_start - 44
-
-		encode_movz := func(v int) {
-			for i := 0; i < 4; i++ {
-				offset[i] = uint8(v >> (i * 8))
-			}
-		}
-
-		// 7 is shellcode size to get RIP
-		encode_movz((data_end - text_start) + (44 - 7))
-
 		shellcode_end := []uint8{
 			0x4d, 0x01, 0xc8,
 			0x57,
@ -560,33 +535,59 @@ func (mc *MachoContext) ReworkForObjc() {
 			0x5e,
 			0x5f, 0x4d, 0x8b, 0x48, 0x08,
 			0x41, 0xff, 0xe1,
+			// pad to %4
+			0x00, 0x00,
 		}

-		shellcode_x = append(shellcode_x, offset...)
-		shellcode_x = append(shellcode_x, shellcode_end...)
+		offset := []uint8{0x00, 0x00, 0x00, 0x00} // offset
+		shellcode_size := len(shellcode_start) + len(offset) + len(shellcode_end)

-		for i := 0; i < len(shellcode_x)-2; i += 4 {
+		// could use buffer encoding, but for correctness,
+		// we do this by hand
+		encode_movz := func(v int) {
+			for i := 0; i < 4; i++ {
+				offset[i] = uint8(v >> (i * 8))
+			}
+		}
+
+
+//                      ┌─────────────────┐
+//                      │                 │
+// shellcode starts ────┼─────────────────┼─────                   │ │instruction
+//                      │                 │                        │ │fetch RIP size
+//      RIP returns ────┼─────────────────┼─────    ▲              │ │
+//                      │                 │         │              │
+//                      │                 │         │              │ shellcode length
+//   shellcode ends     │                 │         │ offset       │
+//           __text ────┼─────────────────┼─────    │ range   │
+//                      │                 │         │         │ __DATA ends - __text
+//                      │                 │         │         │
+//      __DATA ends ────┼─────────────────┼─────    ▼         │
+//                      │                 │
+//                      │                 │
+//                      │                 │
+//                      │                 │
+//                      │                 │
+//                      └─────────────────┘
+		encode_movz((data_end - text_start) + (shellcode_size - len(shellcode_start)))
+
+		shellcode_offset = text_start - shellcode_size
+		shellcode_bytes := append(shellcode_start, offset...)
+		shellcode_bytes = append(shellcode_bytes, shellcode_end...)
+
+		for i := 0; i < len(shellcode_bytes); i += 4 {
 			val := 0
 			// little endian
-			val |= int(shellcode_x[i+0]) << 0
-			val |= int(shellcode_x[i+1]) << 8
-			val |= int(shellcode_x[i+2]) << 16
-			val |= int(shellcode_x[i+3]) << 24
+			val |= int(shellcode_bytes[i+0]) << 0
+			val |= int(shellcode_bytes[i+1]) << 8
+			val |= int(shellcode_bytes[i+2]) << 16
+			val |= int(shellcode_bytes[i+3]) << 24
 			shellcode = append(shellcode, uint32(val))
 		}

-		shellcode = append(shellcode, 0x0000e1ff)
-
-		// shellcode[10] = movz_data_end_offset
-		// shellcode[19] = movz_main_offset
-
 		fmt.Printf("// shellcode_offset=%x\n", shellcode_offset)
 		fmt.Printf("// main_offset=%x\n", main_offset)
 		fmt.Printf("// data_end=%x\n", data_end)
-		// fmt.Printf("// movz_calculate_offset=%x\n", movz_calculate_offset)
-		// fmt.Printf("// movz_shellcode_offset=%x\n", movz_shellcode_offset)
-		// fmt.Printf("// movz_main_offset=%x\n", movz_main_offset)
-		// fmt.Printf("// movz_data_end_offset=%x\n", movz_data_end_offset)
 		fmt.Printf("// lc_main_offset=%x\n", lc_main_offset)
 	}

--- a/research/custom_loader/b.cc
+++ b/research/custom_loader/b.cc
@ -389,29 +389,12 @@ void *get_export_trie(const void *header, uint32_t &size) {
      const uint32_t offset = *((uint32_t *)ptr + 2);
      size = *((uint32_t *)ptr + 3);
      uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset;
-
-      printf("trie: %p\n",linkedit_vmaddr + slice + offset_in_linkedit );
-      if (linkedit_vmaddr + slice + offset_in_linkedit == 0x7ff888187f78) {
-        FILE* f = fopen("../scripts/libsystem_c.dylib", "wb");
-        // fwrite(header, size + linkedit_vmaddr + slice + offset_in_linkedit, 1, f);
-        // fwrite((char*)linkedit_vmaddr + slice + offset_in_linkedit, size, 1, f);
-        fclose(f);
-      }
-
      return (void *)(linkedit_vmaddr + slice + offset_in_linkedit);
    }
    if (cmd == LC_DYLD_INFO_ONLY) {
      const uint32_t offset = *((uint32_t *)ptr + 10);
      size = *((uint32_t *)ptr + 11);
      uint64_t offset_in_linkedit = (uint64_t)offset - linkedit_fileoffset;
-
-      printf("trie: %p\n",linkedit_vmaddr + slice + offset_in_linkedit );
-      if (linkedit_vmaddr + slice + offset_in_linkedit == 0x7ff888187f78) {
-        FILE* f = fopen("../scripts/libsystem_c.bin", "wb");
-        fwrite((char*)linkedit_vmaddr + slice + offset_in_linkedit, size, 1, f);
-        fclose(f);
-      }
-
      return (void *)(linkedit_vmaddr + slice + offset_in_linkedit);
    }
    if (cmd == LC_SEGMENT_64) {
@ -507,8 +490,22 @@ void *find_in_export_trie(const void *header, void *trie, char *& symbol) {
  decode_uleb128(ptr, &func);

  if (flag == 0x8 /*re-export*/) {
+    // this hits a re-export symbol but with another name
+    // usually, the re-export is the same name on another library
+    // but somehow, for system libraries, a lot of symbols are
+    // renamed and re-exported from another library
+    // probably this was to build wrappers and
+    // have custom platform optimizations
+    //
+    // example of these is _strlen in libsystem_c.dylib
+    // is re-exported from __platform_strlen in libplatform
+    //
+    // The purpose of using char*& is to change the symbol searching
+    // to another symbols and do it quickly using references (pointer)
+    //
+    // we return 0 so the dlsym continues to search,
+    // but with another symbol name because the symbol points to another string
    symbol = ptr;
-    printf("re-export %s\n", symbol);
    return 0;
  }
  return (void *)((char *)header + func);
@ -560,21 +557,34 @@ void *find_in_lib(struct libcache *cache, struct libcache_item *lib,
    return direct;
  }
  // cannot find in directly exported trie, loop through all reexport libs
-  // printf("looking for symbol: %s\n", symbol);
  return find_in_reexport(cache, lib, symbol);
 }

+// the current logic of dlsym is not correct, but it works for PoC
+//
+// dlsym searchs and match libraries based on the LC_DYLD_ID load command
+// while for our PoC, we use the paths of libraries to search for them
+//
+// for performance reasons, we do not compare the paths as strings
+// we instead use a simple hash to carry out comparision
+// using hashes allows us to compare integers and would be faster
 void *custom_dlsym(struct libcache *cache, uint32_t hash, const char *symbol) {
  for (size_t i = 0; i < cache->size; i++) {
    struct libcache_item *cache_lib = &cache->libs[i];
    if (cache_lib->hash == hash) {
-      // if (custom_strcmp(symbol, "_strlen") == 0) {
-      //   return find_in_lib(cache, cache_lib, "__platform_strlen");
-      // }
+      // read find_in_export_trie comments to know the use of char*&
+      //
+      // this code is for when the symbol searching references
+      // a previous item in search chain
+      //
+      // For example:
+      // searching for X in [A, B, C],
+      // C has X but it is a re-export from B with the name Y
+      // then we have to perform a search again from the top
+      // but with symbol Y
      char**  symbol_copy = (char**)&symbol;
      void* func = find_in_lib(cache, cache_lib, *symbol_copy);
      if (*symbol_copy != symbol) {
-        printf("symbol found %p %s %s\n", func, *symbol_copy, symbol);
        func = find_in_lib(cache, cache_lib, *symbol_copy);
      }
      return func;
@ -901,12 +911,12 @@ void build_cache(struct libcache &cache, void *main) {
  char* dyld_image_count_s = "__dyld_image_count";
  int (*dyld_image_count_func)(void) = (dyld_image_count_t)find_in_export_trie(
      libdyld, libdyld_export_trie, dyld_image_count_s);
-      
+
  char* dyld_get_image_header_s = "__dyld_get_image_header";
  void *(*dyld_get_image_header_func)(int) =
      (dyld_get_image_header_t)find_in_export_trie(libdyld, libdyld_export_trie,
                                                   dyld_get_image_header_s);
-                                            
+
  char* dyld_get_image_name_s = "__dyld_get_image_name";
  char *(*dyld_get_image_name_func)(int) =
      (dyld_get_image_name_t)find_in_export_trie(libdyld, libdyld_export_trie,
@ -1182,7 +1192,7 @@ void fix(struct libcache &cache) {
 void volatile custom_initializer(int argc, const char *const argv[],
                                 const char *const envp[],
                                 const char *const apple[]) {
-  printf("run custom initializers %p\n", custom_initializer_i);
+  printf("[+] run custom initializers\n");

  if (custom_initializer_i->cls != 0) {
    // for Objective-C load
@ -1251,7 +1261,7 @@ void volatile custom_initializer(int argc, const char *const argv[],
    free(custom_initializer_i->constructors);
  }

-  printf("initializers completed\n");
+  printf("[+] initializers completed\n");
  free(custom_initializer_i);
 }

@ -1452,30 +1462,18 @@ void fix_initializer(struct libcache_item *libfixing, struct libcache &cache) {
  // (note: __TEXT segment is aligned to the end of the page, free space in the
  // middle)
  //
-  // Below is the shellcode.
-  /*
-  adr x8, 0
-  # x9 = (offset end of __DATA) - (offset shellcode)
-  movz x9, #0x9999
-  add x8, x8, x9
-
-  stp x30, x8, [sp], #-0x10
-  stp x3, x2, [sp], #-0x10
-  stp x1, x0, [sp], #-0x10
-
-  # custom intializer
-  ldr x9, [x8]
-  blr x9
-
-  ldp x1, x0, [sp, #0x10]!
-  ldp x3, x2, [sp, #0x10]!
-  ldp x30, x8, [sp, #0x10]!
-
-  # original main
-  # link register is set so jump only
-  ldr x9, [x8, #8]
-  br x9
-  */
+  // The shellcode is built using the ios-wrapper tool
+  // The idea is:
+  //
+  // push main arguments
+  // r8 = shellcode location
+  // r9 = offset from shellcode to __DATA end
+  // r8 = r8 + r9 -- get __DATA end address
+  // r9 = *r8 -- the first pointer is custom_initializer
+  // call r9
+  // r9 = *(r8 + 4) -- the second pointer is main function
+  // pop main arguments
+  // jump r9 -- do not call, return to dyld

  void *header = libfixing->header;
  const uint32_t magic = *(uint32_t *)header;
@ -1542,7 +1540,9 @@ void fix_initializer(struct libcache_item *libfixing, struct libcache &cache) {
        uint64_t *dummy = (uint64_t *)(addr + slide + size);
        dummy[0] = (uint64_t)custom_initializer;
        dummy[1] = (uint64_t)(header) + bshield_data::main;
-        printf("add custom main-peg at %p\n", dummy);
+        printf("-- add custom main-peg at %p\n", dummy);
+        printf("-- custom initializer  at %llx\n", dummy[0]);
+        printf("-- main function       at %llx\n", dummy[1]);
      } else if (custom_strcmp(name, "__LINKEDIT") == 0) {
        linkedit_vmaddr = vmaddr;
        linkedit_fileoffset = fileoffset;