module Macho = struct (* type u32 = Unsigned.UInt32.t *) type u16 = int32 type u32 = int32 type u64 = int64 type section = { name : string; segment : string; addr : u64; size : u64; offset : u32; align : u32; reloff : u32; nreloc : u32; flags : u32; } type command = | Command of { cmd : u32; size : u32 } | Segment of { name : string; vmaddr : u64; vmsize : u64; fileoff : u64; filesize : u64; sections : section list; flags : u32; } | LinkEdit of { cmd : u32; dataoff : u32; datasize : u32 } | LoadDylib of { name: string } (* | SymbolTable of {} | DynamicSymbolTable of {} | Linker of {} | UUID of {} | BuildVersion of {} | SourceVersion of {} | FunctionStart of {} | DataInCode of {} | Main of {} | Encryption of {} *) type import = { lib : u32; name : string } type chain = { size: u32; page_size: u16; pointer_format: u16; segment_offset: u64; max_valid_pointer: u32; pages: u16 list } let print_chain chain = Printf.printf "chain\n"; Printf.printf " page_size 0x%x\n" (Int32.to_int chain.page_size land 0xffffffff); Printf.printf " pointer format 0x%x\n" (Int32.to_int chain.pointer_format land 0xffffffff); Printf.printf " segment 0x%x\n" (Int64.to_int chain.segment_offset); Printf.printf " pages: ["; List.iter (fun page -> Printf.printf "0x%x," (Int32.to_int page land 0xffff)) chain.pages; Printf.printf "]\n"; type fixup = | Bind of { ordinal: u64; addend: u64 } | Rebase of { target: u64; high8: u64 } let print_fixup = function | Bind bind -> Printf.printf "Bind(ordinal:%x, %x)\n" (Int64.to_int bind.ordinal) (Int64.to_int bind.addend) | Rebase rebase -> Printf.printf "" type symbol = { typ: string; name: string; lib: string; } type t = { magic : u32; cputype : u32; cpusubtype : u32; filetype : u32; ncmds : u32; sizeofcmds : u32; flags : u32; commands : command list; } let lc_segment = 0x00000001l let lc_symtab = 0x00000002l let lc_symseg = 0x00000003l let lc_thread = 0x00000004l let lc_unixthread = 0x00000005l let lc_loadfvmlib = 0x00000006l let lc_idfvmlib = 0x00000007l let lc_ident = 0x00000008l let lc_fvmfile = 0x00000009l let lc_prepage = 0x0000000al let lc_dysymtab = 0x0000000bl let lc_load_dylib = 0x0000000cl let lc_id_dylib = 0x0000000dl let lc_load_dylinker = 0x0000000el let lc_id_dylinker = 0x0000000fl let lc_prebound_dylib = 0x00000010l let lc_routines = 0x00000011l let lc_sub_framework = 0x00000012l let lc_sub_umbrella = 0x00000013l let lc_sub_client = 0x00000014l let lc_sub_library = 0x00000015l let lc_twolevel_hints = 0x00000016l let lc_prebind_cksum = 0x00000017l let lc_load_weak_dylib = 0x80000018l let lc_segment_64 = 0x00000019l let lc_routines_64 = 0x0000001al let lc_uuid = 0x0000001bl let lc_rpath = 0x8000001cl let lc_code_signature = 0x0000001dl let lc_segment_split_info = 0x0000001el let lc_reexport_dylib = 0x8000001fl let lc_lazy_load_dylib = 0x00000020l let lc_encryption_info = 0x00000021l let lc_dyld_info = 0x00000022l let lc_dyld_info_only = 0x80000022l let lc_load_upward_dylib = 0x80000023l let lc_version_min_macosx = 0x00000024l let lc_version_min_iphoneos = 0x00000025l let lc_function_starts = 0x00000026l let lc_dyld_environment = 0x00000027l let lc_main = 0x80000028l let lc_data_in_code = 0x00000029l let lc_source_version = 0x0000002al let lc_dylib_code_sign_drs = 0x0000002bl let lc_encryption_info_64 = 0x0000002cl let lc_linker_option = 0x0000002dl let lc_linker_optimization_hint = 0x0000002el let lc_version_min_tvos = 0x0000002fl let lc_version_min_watchos = 0x00000030l let lc_note = 0x00000031l let lc_build_version = 0x00000032l let lc_dyld_exports_trie = 0x80000033l let lc_dyld_chained_fixups = 0x80000034l let lc_fileset_entry = 0x80000035l let rec parse_sections bytes = let sections : section list = match%bitstring bytes with | {| name : 16*8 : string; segment : 16*8 : string; addr: 64 : littleendian; size: 64 : littleendian; offset: 32 : littleendian; align: 32 : littleendian; reloff: 32 : littleendian; nreloc: 32 : littleendian; flags: 32 : littleendian; reserved: 32*3 : bitstring; rest : -1 : bitstring |} -> let this_section = { name; segment; addr; size; offset; align; reloff; nreloc; flags } in this_section :: parse_sections rest | {|_|} -> [] in sections let rec parse_commands input = let isLinkEditCommand cmd = let linkedit_cmds = [ lc_dyld_exports_trie; lc_dyld_chained_fixups; lc_code_signature; lc_segment_split_info; lc_function_starts; lc_data_in_code; lc_dylib_code_sign_drs; lc_linker_optimization_hint; ] in List.exists (fun x -> cmd = x) linkedit_cmds in (* let () = Printf.printf "%s" (Bitstring.string_of_bitstring input) in *) let commands : command list = match%bitstring input with | {| cmd: 32 : littleendian; cmdsize: 32 : littleendian; name : 16*8 : string; vmaddr : 64 : littleendian; vmsize : 64 : littleendian; fileoff : 64 : littleendian; filesize : 64 : littleendian; permission : 64 : littleendian; nsect: 32 : littleendian; flags: 32 : littleendian; sections_bytes : ((Int32.to_int nsect) * 10 * 64) : bitstring; rest : -1 : bitstring |} when cmd = lc_segment_64 -> let sections = parse_sections sections_bytes in let this_command = Segment { name; vmaddr; vmsize; fileoff; filesize; sections; flags } in this_command :: parse_commands rest | {| cmd: 32 : littleendian; cmdsize: 32 : littleendian; dataoff : 32 : littleendian; datasize : 32 : littleendian; rest : -1 : bitstring |} when isLinkEditCommand cmd -> let this_command = LinkEdit { cmd; dataoff; datasize } in this_command :: parse_commands rest | {| cmd: 32 : littleendian; cmdsize: 32 : littleendian; name_offset : 32 : littleendian; timestamp : 32 : littleendian; current_version : 32 : littleendian; compatibility_version : 32 : littleendian; name : ((Int32.to_int cmdsize)*8 - 32*6) : bitstring; rest : -1 : bitstring |} when cmd = lc_load_dylib -> let name = Bitstring.string_of_bitstring name in let end_offset = String.index_from name 0 '\x00' in let name = String.sub name 0 end_offset in let this_command = LoadDylib { name } in this_command :: parse_commands rest | {| cmd: 32 : littleendian; cmdsize: 32 : littleendian; data : ((Int32.to_int cmdsize)*8 - 64) : bitstring; rest : -1 : bitstring |} -> (* let () = Printf.printf "parsing command %x\n" (Int32.to_int cmdsize) in *) (* let () = Bitstring.hexdump_bitstring stdout data in *) let this_command = Command { cmd; size = cmdsize } in this_command :: parse_commands rest | {|_|} -> [] in commands let summary_print macho = List.iter (fun cmd -> match cmd with | LoadDylib dylib -> Printf.printf "Load Dylib %s\n" dylib.name; | Segment segment -> let () = Printf.printf "Segment %s\n" segment.name in List.iter (fun (sec : section) -> Printf.printf " Section %s\n" sec.name) segment.sections | LinkEdit linkedit -> let () = Printf.printf "LinkEdit 0x%x\n" (Int32.to_int linkedit.cmd land 0xffffffff) in let () = Printf.printf " dataoff 0x%x\n" (Int32.to_int linkedit.dataoff land 0xffffffff) in ignore (Printf.printf " datasize 0x%x\n" (Int32.to_int linkedit.datasize land 0xffffffff)) | Command command -> Printf.printf "Command 0x%x\n" (Int32.to_int command.cmd land 0xffffffff)) macho.commands let parse_fixups macho input = let fixup_matching = function | LinkEdit linkedit when linkedit.cmd = lc_dyld_chained_fixups -> Some (LinkEdit linkedit) | _ -> None in let fixups_cmds = List.filter_map fixup_matching macho.commands in match fixups_cmds with | LinkEdit fixups_cmd :: [] -> ( let start = Int32.to_int fixups_cmd.dataoff * 8 in let length = Int32.to_int fixups_cmd.datasize * 8 in let fixups_bytes = Bitstring.takebits length (Bitstring.dropbits start input) in match%bitstring fixups_bytes with | {| version: 32 : littleendian; offset: 32 : littleendian; imports_offset: 32 : littleendian; symbols_offset : 32 : littleendian; imports_count: 32 : littleendian; imports_format : 32: littleendian; symbols_format: 32 : littleendian |} -> let imports_count_bits = Int32.to_int imports_count * 32 in let imports_offset_bits = Int32.to_int imports_offset * 8 in let symbols_offset_bits = Int32.to_int symbols_offset * 8 in let imports_bytes = Bitstring.takebits imports_count_bits (Bitstring.dropbits imports_offset_bits fixups_bytes) in let symbols_bytes = Bitstring.string_of_bitstring (Bitstring.dropbits symbols_offset_bits fixups_bytes) in let extract_symbol_name start_offset = let end_offset = String.index_from symbols_bytes start_offset '\x00' in String.sub symbols_bytes start_offset (end_offset - start_offset) in let rec parse_fixups_chains chains = match%bitstring chains with | {|count: 32: littleendian; chains_offsets_bytes:(Int32.to_int count)*32: bitstring; _ : -1 : bitstring |} -> let rec parse_chains_offset chains_offsets_bytes = match%bitstring chains_offsets_bytes with | {|chain_offset:32:littleendian; rest:-1:bitstring|} -> (Int32.to_int chain_offset) :: parse_chains_offset rest | {|_|} -> [] in let chains_offset = List.filter (fun offset -> offset <> 0) (parse_chains_offset chains_offsets_bytes) in let rec parse_chains chain = let () = Bitstring.hexdump_bitstring stdout chain in match%bitstring chain with | {| size:32:littleendian; page_size:16:littleendian; pointer_format:16:littleendian; segment_offset:64:littleendian; max_valid_pointer:32:littleendian; page_count:16:littleendian; page_start:16 * page_count: bitstring; _:-1:bitstring |} -> let rec parse_pages pages_bytes = match%bitstring pages_bytes with | {|page:16:littleendian; rest:-1:bitstring|} -> (Int32.of_int page) :: parse_pages rest | {|_|} -> [] in let pages = parse_pages page_start in {size; page_size=(Int32.of_int page_size); pointer_format=(Int32.of_int pointer_format);segment_offset;max_valid_pointer;pages} in List.map (fun offset -> let chains_bytes = Bitstring.dropbits (offset * 8) chains in parse_chains chains_bytes ) chains_offset | {|_|} -> [] in let chains_bytes_len = ((Int32.to_int imports_offset) - (Int32.to_int offset)) * 8 in let chains_bytes = Bitstring.takebits chains_bytes_len (Bitstring.dropbits ((Int32.to_int offset) * 8) fixups_bytes) in let chains = parse_fixups_chains chains_bytes in let rec parse_imports imports_bytes fixups_bytes = match%bitstring imports_bytes with | {| lib_ordinal:8:littleendian; weak_import:1:littleendian; name_offset:23:littleendian,map(fun v -> v / 4); rest : -1 : bitstring |} -> let name = extract_symbol_name name_offset in let import = { lib = Int32.of_int lib_ordinal; name } in import :: parse_imports rest fixups_bytes | {|_|} -> [] in (* we have chains and imports table *) let fixups = List.map (fun chain -> Printf.printf "segment: %x\n" (Int64.to_int chain.segment_offset); let segment = Bitstring.dropbits ((Int64.to_int chain.segment_offset) * 8) input in (* let segment = Bitstring.takebits (2 * 64) segment in *) List.map (fun page -> let page = Int32.to_int page in let chain_walk_bytes = Bitstring.dropbits (page * 8) segment in let rec chain_walk bytes = match%bitstring bytes with | {|value:64:littleendian; rest:-1:bitstring|} -> (* bitfield is annoying *) (* decodes from right to left due to endianess and casting of uint64 *) let bind1 = (Int64.to_int (Int64.shift_right_logical value 63)) = 1 in let bind2 = (Int64.to_int (Int64.shift_right_logical value 63)) = -1 in let bind = bind1 || bind2 in let next = (Int64.to_int (Int64.shift_right_logical (Int64.shift_left value 1) (64 - 12))) in let fixup = match bind with | true -> let ordinal = (Int64.logand value 0xFFFFFFL) in let addend = (Int64.logand (Int64.shift_right value 24) 0xFFL) in Bind { ordinal; addend } | false -> let target = (Int64.logand value 0xFFFFFFFFFL) in let high8 = (Int64.logand (Int64.shift_right value 36) 0xFFL) in Rebase { target; high8 } in match next with | 0 -> [fixup] | _ -> fixup :: chain_walk rest in chain_walk chain_walk_bytes ) chain.pages ) chains in let fixups = List.flatten (List.flatten fixups) in let imports = parse_imports imports_bytes fixups_bytes in let dylibs = List.fold_right (fun cmd out -> match cmd with | LoadDylib dylib -> dylib.name :: out | _ -> out ) macho.commands [] in (* List.iter (print_fixup) fixups; *) Printf.printf "import len %x\n" (List.length imports); Printf.printf "should be import len %x\n" (Int32.to_int imports_count); List.map (function | Bind bind -> (* Printf.printf "try symbol nth %x\n" (Int64.to_int bind.ordinal); *) let _import = List.nth_opt imports (Int64.to_int bind.ordinal) in let x = match _import with | Some import -> let name = import.name in (* Printf.printf "try lib nth %x\n" (Int32.to_int import.lib); *) let lib = match (Int32.to_int import.lib) with | 0 | 0xfd -> "@self" | _ -> List.nth dylibs ((Int32.to_int import.lib) - 1) in { typ="bind"; name; lib } | None -> {typ="bind?"; name="?"; lib="?"} in x | Rebase rebase -> { typ="rebase"; name="rebase bruh"; lib="rebase lib" } ) (List.filter (function | Bind bind -> true | _ -> false) fixups) | {|_|} -> []) end open Macho;; let () = Printf.printf "\n" in let input = Bitstring.bitstring_of_file "./research/cases/b" in let header = Bitstring.takebits (32 * 7) input in let macho : Macho.t = match%bitstring header with | {| 0xFEEDFACEl : 32 : littleendian; cputype: 32 : littleendian; cpusubtype: 32 : littleendian; filetype: 32 : littleendian; ncmds: 32 : littleendian; sizeofcmds: 32 : littleendian; flags: 32 : littleendian |} -> let commands_size = Int32.to_int sizeofcmds * 8 in let commands_bytes = Bitstring.takebits commands_size (Bitstring.dropbits (32 * 7) input) in let commands = Macho.parse_commands commands_bytes in { magic = 0xFEEDFACEl; cputype; cpusubtype; filetype; ncmds; sizeofcmds; flags; commands; } | {| 0xFEEDFACFl: 32 : littleendian; cputype: 32 : littleendian; cpusubtype: 32 : littleendian; filetype: 32 : littleendian; ncmds: 32 : littleendian; sizeofcmds: 32 : littleendian; flags: 32 : littleendian |} -> let commands_size : int = Int32.to_int sizeofcmds * 8 in let commands_bytes = Bitstring.takebits commands_size (Bitstring.dropbits (32 * 8) input) in (* let () = Bitstring.hexdump_bitstring stdout commands_bytes in *) let commands = Macho.parse_commands commands_bytes in { magic = 0xFEEDFACEl; cputype; cpusubtype; filetype; ncmds; sizeofcmds; flags; commands; } (* | {| _ |} -> "not a macho" *) (* raise (ParseError "Input file is not a Macho") *) (* parse_macho (Bitstring.dropbits (32 * 9) input) 64 *) in let () = Macho.summary_print macho in let imports = Macho.parse_fixups macho input in Printf.printf ""; (* let () = Printf.printf "Symbols\n" in List.iter (fun symbol -> Printf.printf " Type=%s; Name=%s; Lib=%s\n" symbol.typ symbol.name symbol.lib) imports *)