macho/macho-ocaml/bin/main.ml
2023-05-31 16:17:32 +07:00

486 lines
18 KiB
OCaml

module Macho = struct
(* type u32 = Unsigned.UInt32.t *)
type u16 = int32
type u32 = int32
type u64 = int64
type section = {
name : string;
segment : string;
addr : u64;
size : u64;
offset : u32;
align : u32;
reloff : u32;
nreloc : u32;
flags : u32;
}
type command =
| Command of { cmd : u32; size : u32 }
| Segment of {
name : string;
vmaddr : u64;
vmsize : u64;
fileoff : u64;
filesize : u64;
sections : section list;
flags : u32;
}
| LinkEdit of { cmd : u32; dataoff : u32; datasize : u32 }
| LoadDylib of { name: string }
(* | SymbolTable of {}
| DynamicSymbolTable of {}
| Linker of {}
| UUID of {}
| BuildVersion of {}
| SourceVersion of {}
| FunctionStart of {}
| DataInCode of {}
| Main of {}
| Encryption of {} *)
type import = { lib : u32; name : string }
type chain = {
size: u32;
page_size: u16;
pointer_format: u16;
segment_offset: u64;
max_valid_pointer: u32;
pages: u16 list
}
let print_chain chain =
Printf.printf "chain\n";
Printf.printf " page_size 0x%x\n" (Int32.to_int chain.page_size land 0xffffffff);
Printf.printf " pointer format 0x%x\n" (Int32.to_int chain.pointer_format land 0xffffffff);
Printf.printf " segment 0x%x\n" (Int64.to_int chain.segment_offset);
Printf.printf " pages: [";
List.iter (fun page -> Printf.printf "0x%x," (Int32.to_int page land 0xffff)) chain.pages;
Printf.printf "]\n";
type fixup =
| Bind of { ordinal: u64; addend: u64 }
| Rebase of { target: u64; high8: u64 }
let print_fixup = function
| Bind bind -> Printf.printf "Bind(ordinal:%x, %x)\n" (Int64.to_int bind.ordinal) (Int64.to_int bind.addend)
| Rebase rebase -> Printf.printf ""
type symbol = {
typ: string;
name: string;
lib: string;
}
type t = {
magic : u32;
cputype : u32;
cpusubtype : u32;
filetype : u32;
ncmds : u32;
sizeofcmds : u32;
flags : u32;
commands : command list;
}
let lc_segment = 0x00000001l
let lc_symtab = 0x00000002l
let lc_symseg = 0x00000003l
let lc_thread = 0x00000004l
let lc_unixthread = 0x00000005l
let lc_loadfvmlib = 0x00000006l
let lc_idfvmlib = 0x00000007l
let lc_ident = 0x00000008l
let lc_fvmfile = 0x00000009l
let lc_prepage = 0x0000000al
let lc_dysymtab = 0x0000000bl
let lc_load_dylib = 0x0000000cl
let lc_id_dylib = 0x0000000dl
let lc_load_dylinker = 0x0000000el
let lc_id_dylinker = 0x0000000fl
let lc_prebound_dylib = 0x00000010l
let lc_routines = 0x00000011l
let lc_sub_framework = 0x00000012l
let lc_sub_umbrella = 0x00000013l
let lc_sub_client = 0x00000014l
let lc_sub_library = 0x00000015l
let lc_twolevel_hints = 0x00000016l
let lc_prebind_cksum = 0x00000017l
let lc_load_weak_dylib = 0x80000018l
let lc_segment_64 = 0x00000019l
let lc_routines_64 = 0x0000001al
let lc_uuid = 0x0000001bl
let lc_rpath = 0x8000001cl
let lc_code_signature = 0x0000001dl
let lc_segment_split_info = 0x0000001el
let lc_reexport_dylib = 0x8000001fl
let lc_lazy_load_dylib = 0x00000020l
let lc_encryption_info = 0x00000021l
let lc_dyld_info = 0x00000022l
let lc_dyld_info_only = 0x80000022l
let lc_load_upward_dylib = 0x80000023l
let lc_version_min_macosx = 0x00000024l
let lc_version_min_iphoneos = 0x00000025l
let lc_function_starts = 0x00000026l
let lc_dyld_environment = 0x00000027l
let lc_main = 0x80000028l
let lc_data_in_code = 0x00000029l
let lc_source_version = 0x0000002al
let lc_dylib_code_sign_drs = 0x0000002bl
let lc_encryption_info_64 = 0x0000002cl
let lc_linker_option = 0x0000002dl
let lc_linker_optimization_hint = 0x0000002el
let lc_version_min_tvos = 0x0000002fl
let lc_version_min_watchos = 0x00000030l
let lc_note = 0x00000031l
let lc_build_version = 0x00000032l
let lc_dyld_exports_trie = 0x80000033l
let lc_dyld_chained_fixups = 0x80000034l
let lc_fileset_entry = 0x80000035l
let rec parse_sections bytes =
let sections : section list =
match%bitstring bytes with
| {| name : 16*8 : string; segment : 16*8 : string;
addr: 64 : littleendian; size: 64 : littleendian;
offset: 32 : littleendian; align: 32 : littleendian;
reloff: 32 : littleendian; nreloc: 32 : littleendian;
flags: 32 : littleendian;
reserved: 32*3 : bitstring;
rest : -1 : bitstring
|}
->
let this_section =
{ name; segment; addr; size; offset; align; reloff; nreloc; flags }
in
this_section :: parse_sections rest
| {|_|} -> []
in
sections
let rec parse_commands input =
let isLinkEditCommand cmd =
let linkedit_cmds =
[
lc_dyld_exports_trie;
lc_dyld_chained_fixups;
lc_code_signature;
lc_segment_split_info;
lc_function_starts;
lc_data_in_code;
lc_dylib_code_sign_drs;
lc_linker_optimization_hint;
]
in
List.exists (fun x -> cmd = x) linkedit_cmds
in
(* let () = Printf.printf "%s" (Bitstring.string_of_bitstring input) in *)
let commands : command list =
match%bitstring input with
| {| cmd: 32 : littleendian; cmdsize: 32 : littleendian;
name : 16*8 : string;
vmaddr : 64 : littleendian; vmsize : 64 : littleendian;
fileoff : 64 : littleendian; filesize : 64 : littleendian;
permission : 64 : littleendian;
nsect: 32 : littleendian; flags: 32 : littleendian;
sections_bytes : ((Int32.to_int nsect) * 10 * 64) : bitstring;
rest : -1 : bitstring
|}
when cmd = lc_segment_64 ->
let sections = parse_sections sections_bytes in
let this_command =
Segment { name; vmaddr; vmsize; fileoff; filesize; sections; flags }
in
this_command :: parse_commands rest
| {| cmd: 32 : littleendian; cmdsize: 32 : littleendian;
dataoff : 32 : littleendian; datasize : 32 : littleendian;
rest : -1 : bitstring
|}
when isLinkEditCommand cmd ->
let this_command = LinkEdit { cmd; dataoff; datasize } in
this_command :: parse_commands rest
| {| cmd: 32 : littleendian; cmdsize: 32 : littleendian;
name_offset : 32 : littleendian; timestamp : 32 : littleendian;
current_version : 32 : littleendian; compatibility_version : 32 : littleendian;
name : ((Int32.to_int cmdsize)*8 - 32*6) : bitstring;
rest : -1 : bitstring
|}
when cmd = lc_load_dylib ->
let name = Bitstring.string_of_bitstring name in
let end_offset = String.index_from name 0 '\x00' in
let name = String.sub name 0 end_offset in
let this_command = LoadDylib { name } in
this_command :: parse_commands rest
| {| cmd: 32 : littleendian;
cmdsize: 32 : littleendian;
data : ((Int32.to_int cmdsize)*8 - 64) : bitstring;
rest : -1 : bitstring
|}
->
(* let () = Printf.printf "parsing command %x\n" (Int32.to_int cmdsize) in *)
(* let () = Bitstring.hexdump_bitstring stdout data in *)
let this_command = Command { cmd; size = cmdsize } in
this_command :: parse_commands rest
| {|_|} -> []
in
commands
let summary_print macho =
List.iter
(fun cmd ->
match cmd with
| LoadDylib dylib ->
Printf.printf "Load Dylib %s\n" dylib.name;
| Segment segment ->
let () = Printf.printf "Segment %s\n" segment.name in
List.iter
(fun (sec : section) -> Printf.printf " Section %s\n" sec.name)
segment.sections
| LinkEdit linkedit ->
let () =
Printf.printf "LinkEdit 0x%x\n"
(Int32.to_int linkedit.cmd land 0xffffffff)
in
let () =
Printf.printf " dataoff 0x%x\n"
(Int32.to_int linkedit.dataoff land 0xffffffff)
in
ignore
(Printf.printf " datasize 0x%x\n"
(Int32.to_int linkedit.datasize land 0xffffffff))
| Command command ->
Printf.printf "Command 0x%x\n"
(Int32.to_int command.cmd land 0xffffffff))
macho.commands
let parse_fixups macho input =
let fixup_matching = function
| LinkEdit linkedit when linkedit.cmd = lc_dyld_chained_fixups ->
Some (LinkEdit linkedit)
| _ -> None
in
let fixups_cmds = List.filter_map fixup_matching macho.commands in
match fixups_cmds with
| LinkEdit fixups_cmd :: [] -> (
let start = Int32.to_int fixups_cmd.dataoff * 8 in
let length = Int32.to_int fixups_cmd.datasize * 8 in
let fixups_bytes =
Bitstring.takebits length (Bitstring.dropbits start input)
in
match%bitstring fixups_bytes with
| {| version: 32 : littleendian;
offset: 32 : littleendian;
imports_offset: 32 : littleendian;
symbols_offset : 32 : littleendian;
imports_count: 32 : littleendian;
imports_format : 32: littleendian;
symbols_format: 32 : littleendian
|}
->
let imports_count_bits = Int32.to_int imports_count * 32 in
let imports_offset_bits = Int32.to_int imports_offset * 8 in
let symbols_offset_bits = Int32.to_int symbols_offset * 8 in
let imports_bytes =
Bitstring.takebits imports_count_bits
(Bitstring.dropbits imports_offset_bits fixups_bytes)
in
let symbols_bytes =
Bitstring.string_of_bitstring
(Bitstring.dropbits symbols_offset_bits fixups_bytes)
in
let extract_symbol_name start_offset =
let end_offset =
String.index_from symbols_bytes start_offset '\x00'
in
String.sub symbols_bytes start_offset (end_offset - start_offset)
in
let rec parse_fixups_chains chains =
match%bitstring chains with
| {|count: 32: littleendian;
chains_offsets_bytes:(Int32.to_int count)*32: bitstring;
_ : -1 : bitstring
|} ->
let rec parse_chains_offset chains_offsets_bytes =
match%bitstring chains_offsets_bytes with
| {|chain_offset:32:littleendian; rest:-1:bitstring|} -> (Int32.to_int chain_offset) :: parse_chains_offset rest
| {|_|} -> []
in
let chains_offset = List.filter (fun offset -> offset <> 0) (parse_chains_offset chains_offsets_bytes) in
let rec parse_chains chain =
let () = Bitstring.hexdump_bitstring stdout chain in
match%bitstring chain with
| {| size:32:littleendian;
page_size:16:littleendian;
pointer_format:16:littleendian;
segment_offset:64:littleendian;
max_valid_pointer:32:littleendian;
page_count:16:littleendian;
page_start:16 * page_count: bitstring;
_:-1:bitstring
|} ->
let rec parse_pages pages_bytes =
match%bitstring pages_bytes with
| {|page:16:littleendian; rest:-1:bitstring|} -> (Int32.of_int page) :: parse_pages rest
| {|_|} -> []
in
let pages = parse_pages page_start in
{size; page_size=(Int32.of_int page_size); pointer_format=(Int32.of_int pointer_format);segment_offset;max_valid_pointer;pages}
in
List.map (fun offset ->
let chains_bytes = Bitstring.dropbits (offset * 8) chains in
parse_chains chains_bytes
) chains_offset
| {|_|} -> []
in
let chains_bytes_len = ((Int32.to_int imports_offset) - (Int32.to_int offset)) * 8 in
let chains_bytes = Bitstring.takebits chains_bytes_len (Bitstring.dropbits ((Int32.to_int offset) * 8) fixups_bytes) in
let chains = parse_fixups_chains chains_bytes
in
let rec parse_imports imports_bytes fixups_bytes =
match%bitstring imports_bytes with
| {| lib_ordinal:8:littleendian;
weak_import:1:littleendian;
name_offset:23:littleendian,map(fun v -> v / 4);
rest : -1 : bitstring
|}
->
let name = extract_symbol_name name_offset in
let import = { lib = Int32.of_int lib_ordinal; name } in
import :: parse_imports rest fixups_bytes
| {|_|} -> []
in
(* we have chains and imports table *)
let fixups = List.map (fun chain ->
Printf.printf "segment: %x\n" (Int64.to_int chain.segment_offset);
let segment = Bitstring.dropbits ((Int64.to_int chain.segment_offset) * 8) input in
(* let segment = Bitstring.takebits (2 * 64) segment in *)
List.map (fun page ->
let page = Int32.to_int page in
let chain_walk_bytes = Bitstring.dropbits (page * 8) segment in
let rec chain_walk bytes =
match%bitstring bytes with
| {|value:64:littleendian;
rest:-1:bitstring|} ->
(* bitfield is annoying *)
(* decodes from right to left due to endianess and casting of uint64 *)
let bind1 = (Int64.to_int (Int64.shift_right_logical value 63)) = 1 in
let bind2 = (Int64.to_int (Int64.shift_right_logical value 63)) = -1 in
let bind = bind1 || bind2 in
let next = (Int64.to_int (Int64.shift_right_logical (Int64.shift_left value 1) (64 - 12))) in
let fixup = match bind with
| true ->
let ordinal = (Int64.logand value 0xFFFFFFL) in
let addend = (Int64.logand (Int64.shift_right value 24) 0xFFL) in
Bind { ordinal; addend }
| false ->
let target = (Int64.logand value 0xFFFFFFFFFL) in
let high8 = (Int64.logand (Int64.shift_right value 36) 0xFFL) in
Rebase { target; high8 }
in
match next with
| 0 -> [fixup]
| _ -> fixup :: chain_walk rest
in
chain_walk chain_walk_bytes
) chain.pages
) chains
in
let fixups = List.flatten (List.flatten fixups) in
let imports = parse_imports imports_bytes fixups_bytes in
let dylibs = List.fold_right (fun cmd out ->
match cmd with
| LoadDylib dylib -> dylib.name :: out
| _ -> out
) macho.commands []
in
(* List.iter (print_fixup) fixups; *)
Printf.printf "import len %x\n" (List.length imports);
Printf.printf "should be import len %x\n" (Int32.to_int imports_count);
List.map (function
| Bind bind ->
(* Printf.printf "try symbol nth %x\n" (Int64.to_int bind.ordinal); *)
let _import = List.nth_opt imports (Int64.to_int bind.ordinal) in
let x = match _import with
| Some import ->
let name = import.name in
(* Printf.printf "try lib nth %x\n" (Int32.to_int import.lib); *)
let lib = match (Int32.to_int import.lib) with
| 0 | 0xfd -> "@self"
| _ -> List.nth dylibs ((Int32.to_int import.lib) - 1)
in
{ typ="bind"; name; lib }
| None -> {typ="bind?"; name="?"; lib="?"}
in x
| Rebase rebase -> { typ="rebase"; name="rebase bruh"; lib="rebase lib" }
) (List.filter (function | Bind bind -> true | _ -> false) fixups)
| {|_|} -> [])
end
open Macho;;
let () = Printf.printf "\n" in
let input = Bitstring.bitstring_of_file "./research/cases/b" in
let header = Bitstring.takebits (32 * 7) input in
let macho : Macho.t =
match%bitstring header with
| {| 0xFEEDFACEl : 32 : littleendian;
cputype: 32 : littleendian;
cpusubtype: 32 : littleendian;
filetype: 32 : littleendian;
ncmds: 32 : littleendian;
sizeofcmds: 32 : littleendian;
flags: 32 : littleendian
|}
->
let commands_size = Int32.to_int sizeofcmds * 8 in
let commands_bytes =
Bitstring.takebits commands_size (Bitstring.dropbits (32 * 7) input)
in
let commands = Macho.parse_commands commands_bytes in
{
magic = 0xFEEDFACEl;
cputype;
cpusubtype;
filetype;
ncmds;
sizeofcmds;
flags;
commands;
}
| {| 0xFEEDFACFl: 32 : littleendian;
cputype: 32 : littleendian;
cpusubtype: 32 : littleendian;
filetype: 32 : littleendian;
ncmds: 32 : littleendian;
sizeofcmds: 32 : littleendian;
flags: 32 : littleendian
|}
->
let commands_size : int = Int32.to_int sizeofcmds * 8 in
let commands_bytes =
Bitstring.takebits commands_size (Bitstring.dropbits (32 * 8) input)
in
(* let () = Bitstring.hexdump_bitstring stdout commands_bytes in *)
let commands = Macho.parse_commands commands_bytes in
{
magic = 0xFEEDFACEl;
cputype;
cpusubtype;
filetype;
ncmds;
sizeofcmds;
flags;
commands;
}
(* | {| _ |} ->
"not a macho" *)
(* raise (ParseError "Input file is not a Macho") *)
(* parse_macho (Bitstring.dropbits (32 * 9) input) 64 *)
in
let () = Macho.summary_print macho in
let imports = Macho.parse_fixups macho input in
Printf.printf "";
(* let () = Printf.printf "Symbols\n" in
List.iter (fun symbol -> Printf.printf " Type=%s; Name=%s; Lib=%s\n" symbol.typ symbol.name symbol.lib) imports *)