486 lines
18 KiB
OCaml
486 lines
18 KiB
OCaml
module Macho = struct
|
|
(* type u32 = Unsigned.UInt32.t *)
|
|
type u16 = int32
|
|
type u32 = int32
|
|
type u64 = int64
|
|
|
|
type section = {
|
|
name : string;
|
|
segment : string;
|
|
addr : u64;
|
|
size : u64;
|
|
offset : u32;
|
|
align : u32;
|
|
reloff : u32;
|
|
nreloc : u32;
|
|
flags : u32;
|
|
}
|
|
|
|
type command =
|
|
| Command of { cmd : u32; size : u32 }
|
|
| Segment of {
|
|
name : string;
|
|
vmaddr : u64;
|
|
vmsize : u64;
|
|
fileoff : u64;
|
|
filesize : u64;
|
|
sections : section list;
|
|
flags : u32;
|
|
}
|
|
| LinkEdit of { cmd : u32; dataoff : u32; datasize : u32 }
|
|
| LoadDylib of { name: string }
|
|
(* | SymbolTable of {}
|
|
| DynamicSymbolTable of {}
|
|
| Linker of {}
|
|
| UUID of {}
|
|
| BuildVersion of {}
|
|
| SourceVersion of {}
|
|
| FunctionStart of {}
|
|
| DataInCode of {}
|
|
| Main of {}
|
|
| Encryption of {} *)
|
|
|
|
type import = { lib : u32; name : string }
|
|
type chain = {
|
|
size: u32;
|
|
page_size: u16;
|
|
pointer_format: u16;
|
|
segment_offset: u64;
|
|
max_valid_pointer: u32;
|
|
pages: u16 list
|
|
}
|
|
|
|
let print_chain chain =
|
|
Printf.printf "chain\n";
|
|
Printf.printf " page_size 0x%x\n" (Int32.to_int chain.page_size land 0xffffffff);
|
|
Printf.printf " pointer format 0x%x\n" (Int32.to_int chain.pointer_format land 0xffffffff);
|
|
Printf.printf " segment 0x%x\n" (Int64.to_int chain.segment_offset);
|
|
Printf.printf " pages: [";
|
|
List.iter (fun page -> Printf.printf "0x%x," (Int32.to_int page land 0xffff)) chain.pages;
|
|
Printf.printf "]\n";
|
|
|
|
type fixup =
|
|
| Bind of { ordinal: u64; addend: u64 }
|
|
| Rebase of { target: u64; high8: u64 }
|
|
|
|
let print_fixup = function
|
|
| Bind bind -> Printf.printf "Bind(ordinal:%x, %x)\n" (Int64.to_int bind.ordinal) (Int64.to_int bind.addend)
|
|
| Rebase rebase -> Printf.printf ""
|
|
|
|
type symbol = {
|
|
typ: string;
|
|
name: string;
|
|
lib: string;
|
|
}
|
|
|
|
type t = {
|
|
magic : u32;
|
|
cputype : u32;
|
|
cpusubtype : u32;
|
|
filetype : u32;
|
|
ncmds : u32;
|
|
sizeofcmds : u32;
|
|
flags : u32;
|
|
commands : command list;
|
|
}
|
|
|
|
let lc_segment = 0x00000001l
|
|
let lc_symtab = 0x00000002l
|
|
let lc_symseg = 0x00000003l
|
|
let lc_thread = 0x00000004l
|
|
let lc_unixthread = 0x00000005l
|
|
let lc_loadfvmlib = 0x00000006l
|
|
let lc_idfvmlib = 0x00000007l
|
|
let lc_ident = 0x00000008l
|
|
let lc_fvmfile = 0x00000009l
|
|
let lc_prepage = 0x0000000al
|
|
let lc_dysymtab = 0x0000000bl
|
|
let lc_load_dylib = 0x0000000cl
|
|
let lc_id_dylib = 0x0000000dl
|
|
let lc_load_dylinker = 0x0000000el
|
|
let lc_id_dylinker = 0x0000000fl
|
|
let lc_prebound_dylib = 0x00000010l
|
|
let lc_routines = 0x00000011l
|
|
let lc_sub_framework = 0x00000012l
|
|
let lc_sub_umbrella = 0x00000013l
|
|
let lc_sub_client = 0x00000014l
|
|
let lc_sub_library = 0x00000015l
|
|
let lc_twolevel_hints = 0x00000016l
|
|
let lc_prebind_cksum = 0x00000017l
|
|
let lc_load_weak_dylib = 0x80000018l
|
|
let lc_segment_64 = 0x00000019l
|
|
let lc_routines_64 = 0x0000001al
|
|
let lc_uuid = 0x0000001bl
|
|
let lc_rpath = 0x8000001cl
|
|
let lc_code_signature = 0x0000001dl
|
|
let lc_segment_split_info = 0x0000001el
|
|
let lc_reexport_dylib = 0x8000001fl
|
|
let lc_lazy_load_dylib = 0x00000020l
|
|
let lc_encryption_info = 0x00000021l
|
|
let lc_dyld_info = 0x00000022l
|
|
let lc_dyld_info_only = 0x80000022l
|
|
let lc_load_upward_dylib = 0x80000023l
|
|
let lc_version_min_macosx = 0x00000024l
|
|
let lc_version_min_iphoneos = 0x00000025l
|
|
let lc_function_starts = 0x00000026l
|
|
let lc_dyld_environment = 0x00000027l
|
|
let lc_main = 0x80000028l
|
|
let lc_data_in_code = 0x00000029l
|
|
let lc_source_version = 0x0000002al
|
|
let lc_dylib_code_sign_drs = 0x0000002bl
|
|
let lc_encryption_info_64 = 0x0000002cl
|
|
let lc_linker_option = 0x0000002dl
|
|
let lc_linker_optimization_hint = 0x0000002el
|
|
let lc_version_min_tvos = 0x0000002fl
|
|
let lc_version_min_watchos = 0x00000030l
|
|
let lc_note = 0x00000031l
|
|
let lc_build_version = 0x00000032l
|
|
let lc_dyld_exports_trie = 0x80000033l
|
|
let lc_dyld_chained_fixups = 0x80000034l
|
|
let lc_fileset_entry = 0x80000035l
|
|
|
|
let rec parse_sections bytes =
|
|
let sections : section list =
|
|
match%bitstring bytes with
|
|
| {| name : 16*8 : string; segment : 16*8 : string;
|
|
addr: 64 : littleendian; size: 64 : littleendian;
|
|
offset: 32 : littleendian; align: 32 : littleendian;
|
|
reloff: 32 : littleendian; nreloc: 32 : littleendian;
|
|
flags: 32 : littleendian;
|
|
reserved: 32*3 : bitstring;
|
|
rest : -1 : bitstring
|
|
|}
|
|
->
|
|
let this_section =
|
|
{ name; segment; addr; size; offset; align; reloff; nreloc; flags }
|
|
in
|
|
this_section :: parse_sections rest
|
|
| {|_|} -> []
|
|
in
|
|
sections
|
|
|
|
let rec parse_commands input =
|
|
let isLinkEditCommand cmd =
|
|
let linkedit_cmds =
|
|
[
|
|
lc_dyld_exports_trie;
|
|
lc_dyld_chained_fixups;
|
|
lc_code_signature;
|
|
lc_segment_split_info;
|
|
lc_function_starts;
|
|
lc_data_in_code;
|
|
lc_dylib_code_sign_drs;
|
|
lc_linker_optimization_hint;
|
|
]
|
|
in
|
|
List.exists (fun x -> cmd = x) linkedit_cmds
|
|
in
|
|
(* let () = Printf.printf "%s" (Bitstring.string_of_bitstring input) in *)
|
|
let commands : command list =
|
|
match%bitstring input with
|
|
| {| cmd: 32 : littleendian; cmdsize: 32 : littleendian;
|
|
name : 16*8 : string;
|
|
vmaddr : 64 : littleendian; vmsize : 64 : littleendian;
|
|
fileoff : 64 : littleendian; filesize : 64 : littleendian;
|
|
permission : 64 : littleendian;
|
|
nsect: 32 : littleendian; flags: 32 : littleendian;
|
|
sections_bytes : ((Int32.to_int nsect) * 10 * 64) : bitstring;
|
|
rest : -1 : bitstring
|
|
|}
|
|
when cmd = lc_segment_64 ->
|
|
let sections = parse_sections sections_bytes in
|
|
let this_command =
|
|
Segment { name; vmaddr; vmsize; fileoff; filesize; sections; flags }
|
|
in
|
|
this_command :: parse_commands rest
|
|
| {| cmd: 32 : littleendian; cmdsize: 32 : littleendian;
|
|
dataoff : 32 : littleendian; datasize : 32 : littleendian;
|
|
rest : -1 : bitstring
|
|
|}
|
|
when isLinkEditCommand cmd ->
|
|
let this_command = LinkEdit { cmd; dataoff; datasize } in
|
|
this_command :: parse_commands rest
|
|
| {| cmd: 32 : littleendian; cmdsize: 32 : littleendian;
|
|
name_offset : 32 : littleendian; timestamp : 32 : littleendian;
|
|
current_version : 32 : littleendian; compatibility_version : 32 : littleendian;
|
|
name : ((Int32.to_int cmdsize)*8 - 32*6) : bitstring;
|
|
rest : -1 : bitstring
|
|
|}
|
|
when cmd = lc_load_dylib ->
|
|
let name = Bitstring.string_of_bitstring name in
|
|
let end_offset = String.index_from name 0 '\x00' in
|
|
let name = String.sub name 0 end_offset in
|
|
let this_command = LoadDylib { name } in
|
|
this_command :: parse_commands rest
|
|
| {| cmd: 32 : littleendian;
|
|
cmdsize: 32 : littleendian;
|
|
data : ((Int32.to_int cmdsize)*8 - 64) : bitstring;
|
|
rest : -1 : bitstring
|
|
|}
|
|
->
|
|
(* let () = Printf.printf "parsing command %x\n" (Int32.to_int cmdsize) in *)
|
|
(* let () = Bitstring.hexdump_bitstring stdout data in *)
|
|
let this_command = Command { cmd; size = cmdsize } in
|
|
this_command :: parse_commands rest
|
|
| {|_|} -> []
|
|
in
|
|
commands
|
|
|
|
let summary_print macho =
|
|
List.iter
|
|
(fun cmd ->
|
|
match cmd with
|
|
| LoadDylib dylib ->
|
|
Printf.printf "Load Dylib %s\n" dylib.name;
|
|
| Segment segment ->
|
|
let () = Printf.printf "Segment %s\n" segment.name in
|
|
List.iter
|
|
(fun (sec : section) -> Printf.printf " Section %s\n" sec.name)
|
|
segment.sections
|
|
| LinkEdit linkedit ->
|
|
let () =
|
|
Printf.printf "LinkEdit 0x%x\n"
|
|
(Int32.to_int linkedit.cmd land 0xffffffff)
|
|
in
|
|
let () =
|
|
Printf.printf " dataoff 0x%x\n"
|
|
(Int32.to_int linkedit.dataoff land 0xffffffff)
|
|
in
|
|
ignore
|
|
(Printf.printf " datasize 0x%x\n"
|
|
(Int32.to_int linkedit.datasize land 0xffffffff))
|
|
| Command command ->
|
|
Printf.printf "Command 0x%x\n"
|
|
(Int32.to_int command.cmd land 0xffffffff))
|
|
macho.commands
|
|
|
|
let parse_fixups macho input =
|
|
let fixup_matching = function
|
|
| LinkEdit linkedit when linkedit.cmd = lc_dyld_chained_fixups ->
|
|
Some (LinkEdit linkedit)
|
|
| _ -> None
|
|
in
|
|
let fixups_cmds = List.filter_map fixup_matching macho.commands in
|
|
match fixups_cmds with
|
|
| LinkEdit fixups_cmd :: [] -> (
|
|
let start = Int32.to_int fixups_cmd.dataoff * 8 in
|
|
let length = Int32.to_int fixups_cmd.datasize * 8 in
|
|
let fixups_bytes =
|
|
Bitstring.takebits length (Bitstring.dropbits start input)
|
|
in
|
|
match%bitstring fixups_bytes with
|
|
| {| version: 32 : littleendian;
|
|
offset: 32 : littleendian;
|
|
imports_offset: 32 : littleendian;
|
|
symbols_offset : 32 : littleendian;
|
|
imports_count: 32 : littleendian;
|
|
imports_format : 32: littleendian;
|
|
symbols_format: 32 : littleendian
|
|
|}
|
|
->
|
|
let imports_count_bits = Int32.to_int imports_count * 32 in
|
|
let imports_offset_bits = Int32.to_int imports_offset * 8 in
|
|
let symbols_offset_bits = Int32.to_int symbols_offset * 8 in
|
|
let imports_bytes =
|
|
Bitstring.takebits imports_count_bits
|
|
(Bitstring.dropbits imports_offset_bits fixups_bytes)
|
|
in
|
|
let symbols_bytes =
|
|
Bitstring.string_of_bitstring
|
|
(Bitstring.dropbits symbols_offset_bits fixups_bytes)
|
|
in
|
|
let extract_symbol_name start_offset =
|
|
let end_offset =
|
|
String.index_from symbols_bytes start_offset '\x00'
|
|
in
|
|
String.sub symbols_bytes start_offset (end_offset - start_offset)
|
|
in
|
|
let rec parse_fixups_chains chains =
|
|
match%bitstring chains with
|
|
| {|count: 32: littleendian;
|
|
chains_offsets_bytes:(Int32.to_int count)*32: bitstring;
|
|
_ : -1 : bitstring
|
|
|} ->
|
|
let rec parse_chains_offset chains_offsets_bytes =
|
|
match%bitstring chains_offsets_bytes with
|
|
| {|chain_offset:32:littleendian; rest:-1:bitstring|} -> (Int32.to_int chain_offset) :: parse_chains_offset rest
|
|
| {|_|} -> []
|
|
in
|
|
let chains_offset = List.filter (fun offset -> offset <> 0) (parse_chains_offset chains_offsets_bytes) in
|
|
let rec parse_chains chain =
|
|
let () = Bitstring.hexdump_bitstring stdout chain in
|
|
match%bitstring chain with
|
|
| {| size:32:littleendian;
|
|
page_size:16:littleendian;
|
|
pointer_format:16:littleendian;
|
|
segment_offset:64:littleendian;
|
|
max_valid_pointer:32:littleendian;
|
|
page_count:16:littleendian;
|
|
page_start:16 * page_count: bitstring;
|
|
_:-1:bitstring
|
|
|} ->
|
|
let rec parse_pages pages_bytes =
|
|
match%bitstring pages_bytes with
|
|
| {|page:16:littleendian; rest:-1:bitstring|} -> (Int32.of_int page) :: parse_pages rest
|
|
| {|_|} -> []
|
|
in
|
|
let pages = parse_pages page_start in
|
|
{size; page_size=(Int32.of_int page_size); pointer_format=(Int32.of_int pointer_format);segment_offset;max_valid_pointer;pages}
|
|
in
|
|
List.map (fun offset ->
|
|
let chains_bytes = Bitstring.dropbits (offset * 8) chains in
|
|
parse_chains chains_bytes
|
|
) chains_offset
|
|
| {|_|} -> []
|
|
in
|
|
let chains_bytes_len = ((Int32.to_int imports_offset) - (Int32.to_int offset)) * 8 in
|
|
let chains_bytes = Bitstring.takebits chains_bytes_len (Bitstring.dropbits ((Int32.to_int offset) * 8) fixups_bytes) in
|
|
let chains = parse_fixups_chains chains_bytes
|
|
in
|
|
let rec parse_imports imports_bytes fixups_bytes =
|
|
match%bitstring imports_bytes with
|
|
| {| lib_ordinal:8:littleendian;
|
|
weak_import:1:littleendian;
|
|
name_offset:23:littleendian,map(fun v -> v / 4);
|
|
rest : -1 : bitstring
|
|
|}
|
|
->
|
|
let name = extract_symbol_name name_offset in
|
|
let import = { lib = Int32.of_int lib_ordinal; name } in
|
|
import :: parse_imports rest fixups_bytes
|
|
| {|_|} -> []
|
|
in
|
|
(* we have chains and imports table *)
|
|
let fixups = List.map (fun chain ->
|
|
Printf.printf "segment: %x\n" (Int64.to_int chain.segment_offset);
|
|
let segment = Bitstring.dropbits ((Int64.to_int chain.segment_offset) * 8) input in
|
|
(* let segment = Bitstring.takebits (2 * 64) segment in *)
|
|
List.map (fun page ->
|
|
let page = Int32.to_int page in
|
|
let chain_walk_bytes = Bitstring.dropbits (page * 8) segment in
|
|
let rec chain_walk bytes =
|
|
match%bitstring bytes with
|
|
| {|value:64:littleendian;
|
|
rest:-1:bitstring|} ->
|
|
(* bitfield is annoying *)
|
|
(* decodes from right to left due to endianess and casting of uint64 *)
|
|
let bind1 = (Int64.to_int (Int64.shift_right_logical value 63)) = 1 in
|
|
let bind2 = (Int64.to_int (Int64.shift_right_logical value 63)) = -1 in
|
|
let bind = bind1 || bind2 in
|
|
let next = (Int64.to_int (Int64.shift_right_logical (Int64.shift_left value 1) (64 - 12))) in
|
|
let fixup = match bind with
|
|
| true ->
|
|
let ordinal = (Int64.logand value 0xFFFFFFL) in
|
|
let addend = (Int64.logand (Int64.shift_right value 24) 0xFFL) in
|
|
Bind { ordinal; addend }
|
|
| false ->
|
|
let target = (Int64.logand value 0xFFFFFFFFFL) in
|
|
let high8 = (Int64.logand (Int64.shift_right value 36) 0xFFL) in
|
|
Rebase { target; high8 }
|
|
in
|
|
match next with
|
|
| 0 -> [fixup]
|
|
| _ -> fixup :: chain_walk rest
|
|
in
|
|
chain_walk chain_walk_bytes
|
|
) chain.pages
|
|
) chains
|
|
in
|
|
let fixups = List.flatten (List.flatten fixups) in
|
|
let imports = parse_imports imports_bytes fixups_bytes in
|
|
let dylibs = List.fold_right (fun cmd out ->
|
|
match cmd with
|
|
| LoadDylib dylib -> dylib.name :: out
|
|
| _ -> out
|
|
) macho.commands []
|
|
in
|
|
(* List.iter (print_fixup) fixups; *)
|
|
Printf.printf "import len %x\n" (List.length imports);
|
|
Printf.printf "should be import len %x\n" (Int32.to_int imports_count);
|
|
List.map (function
|
|
| Bind bind ->
|
|
(* Printf.printf "try symbol nth %x\n" (Int64.to_int bind.ordinal); *)
|
|
let _import = List.nth_opt imports (Int64.to_int bind.ordinal) in
|
|
let x = match _import with
|
|
| Some import ->
|
|
let name = import.name in
|
|
(* Printf.printf "try lib nth %x\n" (Int32.to_int import.lib); *)
|
|
let lib = match (Int32.to_int import.lib) with
|
|
| 0 | 0xfd -> "@self"
|
|
| _ -> List.nth dylibs ((Int32.to_int import.lib) - 1)
|
|
in
|
|
{ typ="bind"; name; lib }
|
|
| None -> {typ="bind?"; name="?"; lib="?"}
|
|
in x
|
|
| Rebase rebase -> { typ="rebase"; name="rebase bruh"; lib="rebase lib" }
|
|
) (List.filter (function | Bind bind -> true | _ -> false) fixups)
|
|
| {|_|} -> [])
|
|
end
|
|
|
|
open Macho;;
|
|
|
|
let () = Printf.printf "\n" in
|
|
let input = Bitstring.bitstring_of_file "./research/cases/b" in
|
|
let header = Bitstring.takebits (32 * 7) input in
|
|
let macho : Macho.t =
|
|
match%bitstring header with
|
|
| {| 0xFEEDFACEl : 32 : littleendian;
|
|
cputype: 32 : littleendian;
|
|
cpusubtype: 32 : littleendian;
|
|
filetype: 32 : littleendian;
|
|
ncmds: 32 : littleendian;
|
|
sizeofcmds: 32 : littleendian;
|
|
flags: 32 : littleendian
|
|
|}
|
|
->
|
|
let commands_size = Int32.to_int sizeofcmds * 8 in
|
|
let commands_bytes =
|
|
Bitstring.takebits commands_size (Bitstring.dropbits (32 * 7) input)
|
|
in
|
|
let commands = Macho.parse_commands commands_bytes in
|
|
{
|
|
magic = 0xFEEDFACEl;
|
|
cputype;
|
|
cpusubtype;
|
|
filetype;
|
|
ncmds;
|
|
sizeofcmds;
|
|
flags;
|
|
commands;
|
|
}
|
|
| {| 0xFEEDFACFl: 32 : littleendian;
|
|
cputype: 32 : littleendian;
|
|
cpusubtype: 32 : littleendian;
|
|
filetype: 32 : littleendian;
|
|
ncmds: 32 : littleendian;
|
|
sizeofcmds: 32 : littleendian;
|
|
flags: 32 : littleendian
|
|
|}
|
|
->
|
|
let commands_size : int = Int32.to_int sizeofcmds * 8 in
|
|
let commands_bytes =
|
|
Bitstring.takebits commands_size (Bitstring.dropbits (32 * 8) input)
|
|
in
|
|
(* let () = Bitstring.hexdump_bitstring stdout commands_bytes in *)
|
|
let commands = Macho.parse_commands commands_bytes in
|
|
{
|
|
magic = 0xFEEDFACEl;
|
|
cputype;
|
|
cpusubtype;
|
|
filetype;
|
|
ncmds;
|
|
sizeofcmds;
|
|
flags;
|
|
commands;
|
|
}
|
|
(* | {| _ |} ->
|
|
"not a macho" *)
|
|
(* raise (ParseError "Input file is not a Macho") *)
|
|
(* parse_macho (Bitstring.dropbits (32 * 9) input) 64 *)
|
|
in
|
|
let () = Macho.summary_print macho in
|
|
let imports = Macho.parse_fixups macho input in
|
|
Printf.printf "";
|
|
(* let () = Printf.printf "Symbols\n" in
|
|
List.iter (fun symbol -> Printf.printf " Type=%s; Name=%s; Lib=%s\n" symbol.typ symbol.name symbol.lib) imports *)
|