From 8b962dd88a046ea2a0a60acd51e8a15e3346f6b9 Mon Sep 17 00:00:00 2001 From: nganhkhoa Date: Wed, 31 May 2023 16:17:32 +0700 Subject: [PATCH] add fun ocaml project --- macho-ocaml/.gitignore | 33 +++ macho-ocaml/.ocamlformat | 1 + macho-ocaml/bin/dune | 11 + macho-ocaml/bin/main.ml | 485 ++++++++++++++++++++++++++++++++++++++ macho-ocaml/dune-project | 28 +++ macho-ocaml/lib/dune | 2 + macho-ocaml/macho.opam | 32 +++ macho-ocaml/test/dune | 2 + macho-ocaml/test/macho.ml | 0 9 files changed, 594 insertions(+) create mode 100644 macho-ocaml/.gitignore create mode 100644 macho-ocaml/.ocamlformat create mode 100644 macho-ocaml/bin/dune create mode 100644 macho-ocaml/bin/main.ml create mode 100644 macho-ocaml/dune-project create mode 100644 macho-ocaml/lib/dune create mode 100644 macho-ocaml/macho.opam create mode 100644 macho-ocaml/test/dune create mode 100644 macho-ocaml/test/macho.ml diff --git a/macho-ocaml/.gitignore b/macho-ocaml/.gitignore new file mode 100644 index 0000000..d9c075f --- /dev/null +++ b/macho-ocaml/.gitignore @@ -0,0 +1,33 @@ +# Created by https://www.toptal.com/developers/gitignore/api/ocaml +# Edit at https://www.toptal.com/developers/gitignore?templates=ocaml + +### OCaml ### +*.annot +*.cmo +*.cma +*.cmi +*.a +*.o +*.cmx +*.cmxs +*.cmxa + +# ocamlbuild working directory +_build/ + +# ocamlbuild targets +*.byte +*.native + +# oasis generated files +setup.data +setup.log + +# Merlin configuring file for Vim and Emacs +.merlin + +# Dune generated files +*.install + +# Local OPAM switch +_opam/ diff --git a/macho-ocaml/.ocamlformat b/macho-ocaml/.ocamlformat new file mode 100644 index 0000000..930ad89 --- /dev/null +++ b/macho-ocaml/.ocamlformat @@ -0,0 +1 @@ +version = 0.25.1 diff --git a/macho-ocaml/bin/dune b/macho-ocaml/bin/dune new file mode 100644 index 0000000..2914210 --- /dev/null +++ b/macho-ocaml/bin/dune @@ -0,0 +1,11 @@ +(executable + (public_name macho) + (name main) + (libraries macho bitstring ppx_bitstring integers) + (preprocess + (pps ppx_bitstring))) + +(env + (dev + (flags + (:standard -warn-error -A)))) diff --git a/macho-ocaml/bin/main.ml b/macho-ocaml/bin/main.ml new file mode 100644 index 0000000..fc63003 --- /dev/null +++ b/macho-ocaml/bin/main.ml @@ -0,0 +1,485 @@ +module Macho = struct + (* type u32 = Unsigned.UInt32.t *) + type u16 = int32 + type u32 = int32 + type u64 = int64 + + type section = { + name : string; + segment : string; + addr : u64; + size : u64; + offset : u32; + align : u32; + reloff : u32; + nreloc : u32; + flags : u32; + } + + type command = + | Command of { cmd : u32; size : u32 } + | Segment of { + name : string; + vmaddr : u64; + vmsize : u64; + fileoff : u64; + filesize : u64; + sections : section list; + flags : u32; + } + | LinkEdit of { cmd : u32; dataoff : u32; datasize : u32 } + | LoadDylib of { name: string } + (* | SymbolTable of {} + | DynamicSymbolTable of {} + | Linker of {} + | UUID of {} + | BuildVersion of {} + | SourceVersion of {} + | FunctionStart of {} + | DataInCode of {} + | Main of {} + | Encryption of {} *) + + type import = { lib : u32; name : string } + type chain = { + size: u32; + page_size: u16; + pointer_format: u16; + segment_offset: u64; + max_valid_pointer: u32; + pages: u16 list + } + + let print_chain chain = + Printf.printf "chain\n"; + Printf.printf " page_size 0x%x\n" (Int32.to_int chain.page_size land 0xffffffff); + Printf.printf " pointer format 0x%x\n" (Int32.to_int chain.pointer_format land 0xffffffff); + Printf.printf " segment 0x%x\n" (Int64.to_int chain.segment_offset); + Printf.printf " pages: ["; + List.iter (fun page -> Printf.printf "0x%x," (Int32.to_int page land 0xffff)) chain.pages; + Printf.printf "]\n"; + + type fixup = + | Bind of { ordinal: u64; addend: u64 } + | Rebase of { target: u64; high8: u64 } + + let print_fixup = function + | Bind bind -> Printf.printf "Bind(ordinal:%x, %x)\n" (Int64.to_int bind.ordinal) (Int64.to_int bind.addend) + | Rebase rebase -> Printf.printf "" + + type symbol = { + typ: string; + name: string; + lib: string; + } + + type t = { + magic : u32; + cputype : u32; + cpusubtype : u32; + filetype : u32; + ncmds : u32; + sizeofcmds : u32; + flags : u32; + commands : command list; + } + + let lc_segment = 0x00000001l + let lc_symtab = 0x00000002l + let lc_symseg = 0x00000003l + let lc_thread = 0x00000004l + let lc_unixthread = 0x00000005l + let lc_loadfvmlib = 0x00000006l + let lc_idfvmlib = 0x00000007l + let lc_ident = 0x00000008l + let lc_fvmfile = 0x00000009l + let lc_prepage = 0x0000000al + let lc_dysymtab = 0x0000000bl + let lc_load_dylib = 0x0000000cl + let lc_id_dylib = 0x0000000dl + let lc_load_dylinker = 0x0000000el + let lc_id_dylinker = 0x0000000fl + let lc_prebound_dylib = 0x00000010l + let lc_routines = 0x00000011l + let lc_sub_framework = 0x00000012l + let lc_sub_umbrella = 0x00000013l + let lc_sub_client = 0x00000014l + let lc_sub_library = 0x00000015l + let lc_twolevel_hints = 0x00000016l + let lc_prebind_cksum = 0x00000017l + let lc_load_weak_dylib = 0x80000018l + let lc_segment_64 = 0x00000019l + let lc_routines_64 = 0x0000001al + let lc_uuid = 0x0000001bl + let lc_rpath = 0x8000001cl + let lc_code_signature = 0x0000001dl + let lc_segment_split_info = 0x0000001el + let lc_reexport_dylib = 0x8000001fl + let lc_lazy_load_dylib = 0x00000020l + let lc_encryption_info = 0x00000021l + let lc_dyld_info = 0x00000022l + let lc_dyld_info_only = 0x80000022l + let lc_load_upward_dylib = 0x80000023l + let lc_version_min_macosx = 0x00000024l + let lc_version_min_iphoneos = 0x00000025l + let lc_function_starts = 0x00000026l + let lc_dyld_environment = 0x00000027l + let lc_main = 0x80000028l + let lc_data_in_code = 0x00000029l + let lc_source_version = 0x0000002al + let lc_dylib_code_sign_drs = 0x0000002bl + let lc_encryption_info_64 = 0x0000002cl + let lc_linker_option = 0x0000002dl + let lc_linker_optimization_hint = 0x0000002el + let lc_version_min_tvos = 0x0000002fl + let lc_version_min_watchos = 0x00000030l + let lc_note = 0x00000031l + let lc_build_version = 0x00000032l + let lc_dyld_exports_trie = 0x80000033l + let lc_dyld_chained_fixups = 0x80000034l + let lc_fileset_entry = 0x80000035l + + let rec parse_sections bytes = + let sections : section list = + match%bitstring bytes with + | {| name : 16*8 : string; segment : 16*8 : string; + addr: 64 : littleendian; size: 64 : littleendian; + offset: 32 : littleendian; align: 32 : littleendian; + reloff: 32 : littleendian; nreloc: 32 : littleendian; + flags: 32 : littleendian; + reserved: 32*3 : bitstring; + rest : -1 : bitstring + |} + -> + let this_section = + { name; segment; addr; size; offset; align; reloff; nreloc; flags } + in + this_section :: parse_sections rest + | {|_|} -> [] + in + sections + + let rec parse_commands input = + let isLinkEditCommand cmd = + let linkedit_cmds = + [ + lc_dyld_exports_trie; + lc_dyld_chained_fixups; + lc_code_signature; + lc_segment_split_info; + lc_function_starts; + lc_data_in_code; + lc_dylib_code_sign_drs; + lc_linker_optimization_hint; + ] + in + List.exists (fun x -> cmd = x) linkedit_cmds + in + (* let () = Printf.printf "%s" (Bitstring.string_of_bitstring input) in *) + let commands : command list = + match%bitstring input with + | {| cmd: 32 : littleendian; cmdsize: 32 : littleendian; + name : 16*8 : string; + vmaddr : 64 : littleendian; vmsize : 64 : littleendian; + fileoff : 64 : littleendian; filesize : 64 : littleendian; + permission : 64 : littleendian; + nsect: 32 : littleendian; flags: 32 : littleendian; + sections_bytes : ((Int32.to_int nsect) * 10 * 64) : bitstring; + rest : -1 : bitstring + |} + when cmd = lc_segment_64 -> + let sections = parse_sections sections_bytes in + let this_command = + Segment { name; vmaddr; vmsize; fileoff; filesize; sections; flags } + in + this_command :: parse_commands rest + | {| cmd: 32 : littleendian; cmdsize: 32 : littleendian; + dataoff : 32 : littleendian; datasize : 32 : littleendian; + rest : -1 : bitstring + |} + when isLinkEditCommand cmd -> + let this_command = LinkEdit { cmd; dataoff; datasize } in + this_command :: parse_commands rest + | {| cmd: 32 : littleendian; cmdsize: 32 : littleendian; + name_offset : 32 : littleendian; timestamp : 32 : littleendian; + current_version : 32 : littleendian; compatibility_version : 32 : littleendian; + name : ((Int32.to_int cmdsize)*8 - 32*6) : bitstring; + rest : -1 : bitstring + |} + when cmd = lc_load_dylib -> + let name = Bitstring.string_of_bitstring name in + let end_offset = String.index_from name 0 '\x00' in + let name = String.sub name 0 end_offset in + let this_command = LoadDylib { name } in + this_command :: parse_commands rest + | {| cmd: 32 : littleendian; + cmdsize: 32 : littleendian; + data : ((Int32.to_int cmdsize)*8 - 64) : bitstring; + rest : -1 : bitstring + |} + -> + (* let () = Printf.printf "parsing command %x\n" (Int32.to_int cmdsize) in *) + (* let () = Bitstring.hexdump_bitstring stdout data in *) + let this_command = Command { cmd; size = cmdsize } in + this_command :: parse_commands rest + | {|_|} -> [] + in + commands + + let summary_print macho = + List.iter + (fun cmd -> + match cmd with + | LoadDylib dylib -> + Printf.printf "Load Dylib %s\n" dylib.name; + | Segment segment -> + let () = Printf.printf "Segment %s\n" segment.name in + List.iter + (fun (sec : section) -> Printf.printf " Section %s\n" sec.name) + segment.sections + | LinkEdit linkedit -> + let () = + Printf.printf "LinkEdit 0x%x\n" + (Int32.to_int linkedit.cmd land 0xffffffff) + in + let () = + Printf.printf " dataoff 0x%x\n" + (Int32.to_int linkedit.dataoff land 0xffffffff) + in + ignore + (Printf.printf " datasize 0x%x\n" + (Int32.to_int linkedit.datasize land 0xffffffff)) + | Command command -> + Printf.printf "Command 0x%x\n" + (Int32.to_int command.cmd land 0xffffffff)) + macho.commands + + let parse_fixups macho input = + let fixup_matching = function + | LinkEdit linkedit when linkedit.cmd = lc_dyld_chained_fixups -> + Some (LinkEdit linkedit) + | _ -> None + in + let fixups_cmds = List.filter_map fixup_matching macho.commands in + match fixups_cmds with + | LinkEdit fixups_cmd :: [] -> ( + let start = Int32.to_int fixups_cmd.dataoff * 8 in + let length = Int32.to_int fixups_cmd.datasize * 8 in + let fixups_bytes = + Bitstring.takebits length (Bitstring.dropbits start input) + in + match%bitstring fixups_bytes with + | {| version: 32 : littleendian; + offset: 32 : littleendian; + imports_offset: 32 : littleendian; + symbols_offset : 32 : littleendian; + imports_count: 32 : littleendian; + imports_format : 32: littleendian; + symbols_format: 32 : littleendian + |} + -> + let imports_count_bits = Int32.to_int imports_count * 32 in + let imports_offset_bits = Int32.to_int imports_offset * 8 in + let symbols_offset_bits = Int32.to_int symbols_offset * 8 in + let imports_bytes = + Bitstring.takebits imports_count_bits + (Bitstring.dropbits imports_offset_bits fixups_bytes) + in + let symbols_bytes = + Bitstring.string_of_bitstring + (Bitstring.dropbits symbols_offset_bits fixups_bytes) + in + let extract_symbol_name start_offset = + let end_offset = + String.index_from symbols_bytes start_offset '\x00' + in + String.sub symbols_bytes start_offset (end_offset - start_offset) + in + let rec parse_fixups_chains chains = + match%bitstring chains with + | {|count: 32: littleendian; + chains_offsets_bytes:(Int32.to_int count)*32: bitstring; + _ : -1 : bitstring + |} -> + let rec parse_chains_offset chains_offsets_bytes = + match%bitstring chains_offsets_bytes with + | {|chain_offset:32:littleendian; rest:-1:bitstring|} -> (Int32.to_int chain_offset) :: parse_chains_offset rest + | {|_|} -> [] + in + let chains_offset = List.filter (fun offset -> offset <> 0) (parse_chains_offset chains_offsets_bytes) in + let rec parse_chains chain = + let () = Bitstring.hexdump_bitstring stdout chain in + match%bitstring chain with + | {| size:32:littleendian; + page_size:16:littleendian; + pointer_format:16:littleendian; + segment_offset:64:littleendian; + max_valid_pointer:32:littleendian; + page_count:16:littleendian; + page_start:16 * page_count: bitstring; + _:-1:bitstring + |} -> + let rec parse_pages pages_bytes = + match%bitstring pages_bytes with + | {|page:16:littleendian; rest:-1:bitstring|} -> (Int32.of_int page) :: parse_pages rest + | {|_|} -> [] + in + let pages = parse_pages page_start in + {size; page_size=(Int32.of_int page_size); pointer_format=(Int32.of_int pointer_format);segment_offset;max_valid_pointer;pages} + in + List.map (fun offset -> + let chains_bytes = Bitstring.dropbits (offset * 8) chains in + parse_chains chains_bytes + ) chains_offset + | {|_|} -> [] + in + let chains_bytes_len = ((Int32.to_int imports_offset) - (Int32.to_int offset)) * 8 in + let chains_bytes = Bitstring.takebits chains_bytes_len (Bitstring.dropbits ((Int32.to_int offset) * 8) fixups_bytes) in + let chains = parse_fixups_chains chains_bytes + in + let rec parse_imports imports_bytes fixups_bytes = + match%bitstring imports_bytes with + | {| lib_ordinal:8:littleendian; + weak_import:1:littleendian; + name_offset:23:littleendian,map(fun v -> v / 4); + rest : -1 : bitstring + |} + -> + let name = extract_symbol_name name_offset in + let import = { lib = Int32.of_int lib_ordinal; name } in + import :: parse_imports rest fixups_bytes + | {|_|} -> [] + in + (* we have chains and imports table *) + let fixups = List.map (fun chain -> + Printf.printf "segment: %x\n" (Int64.to_int chain.segment_offset); + let segment = Bitstring.dropbits ((Int64.to_int chain.segment_offset) * 8) input in + (* let segment = Bitstring.takebits (2 * 64) segment in *) + List.map (fun page -> + let page = Int32.to_int page in + let chain_walk_bytes = Bitstring.dropbits (page * 8) segment in + let rec chain_walk bytes = + match%bitstring bytes with + | {|value:64:littleendian; + rest:-1:bitstring|} -> + (* bitfield is annoying *) + (* decodes from right to left due to endianess and casting of uint64 *) + let bind1 = (Int64.to_int (Int64.shift_right_logical value 63)) = 1 in + let bind2 = (Int64.to_int (Int64.shift_right_logical value 63)) = -1 in + let bind = bind1 || bind2 in + let next = (Int64.to_int (Int64.shift_right_logical (Int64.shift_left value 1) (64 - 12))) in + let fixup = match bind with + | true -> + let ordinal = (Int64.logand value 0xFFFFFFL) in + let addend = (Int64.logand (Int64.shift_right value 24) 0xFFL) in + Bind { ordinal; addend } + | false -> + let target = (Int64.logand value 0xFFFFFFFFFL) in + let high8 = (Int64.logand (Int64.shift_right value 36) 0xFFL) in + Rebase { target; high8 } + in + match next with + | 0 -> [fixup] + | _ -> fixup :: chain_walk rest + in + chain_walk chain_walk_bytes + ) chain.pages + ) chains + in + let fixups = List.flatten (List.flatten fixups) in + let imports = parse_imports imports_bytes fixups_bytes in + let dylibs = List.fold_right (fun cmd out -> + match cmd with + | LoadDylib dylib -> dylib.name :: out + | _ -> out + ) macho.commands [] + in + (* List.iter (print_fixup) fixups; *) + Printf.printf "import len %x\n" (List.length imports); + Printf.printf "should be import len %x\n" (Int32.to_int imports_count); + List.map (function + | Bind bind -> + (* Printf.printf "try symbol nth %x\n" (Int64.to_int bind.ordinal); *) + let _import = List.nth_opt imports (Int64.to_int bind.ordinal) in + let x = match _import with + | Some import -> + let name = import.name in + (* Printf.printf "try lib nth %x\n" (Int32.to_int import.lib); *) + let lib = match (Int32.to_int import.lib) with + | 0 | 0xfd -> "@self" + | _ -> List.nth dylibs ((Int32.to_int import.lib) - 1) + in + { typ="bind"; name; lib } + | None -> {typ="bind?"; name="?"; lib="?"} + in x + | Rebase rebase -> { typ="rebase"; name="rebase bruh"; lib="rebase lib" } + ) (List.filter (function | Bind bind -> true | _ -> false) fixups) + | {|_|} -> []) +end + +open Macho;; + +let () = Printf.printf "\n" in +let input = Bitstring.bitstring_of_file "./research/cases/b" in +let header = Bitstring.takebits (32 * 7) input in +let macho : Macho.t = + match%bitstring header with + | {| 0xFEEDFACEl : 32 : littleendian; + cputype: 32 : littleendian; + cpusubtype: 32 : littleendian; + filetype: 32 : littleendian; + ncmds: 32 : littleendian; + sizeofcmds: 32 : littleendian; + flags: 32 : littleendian + |} + -> + let commands_size = Int32.to_int sizeofcmds * 8 in + let commands_bytes = + Bitstring.takebits commands_size (Bitstring.dropbits (32 * 7) input) + in + let commands = Macho.parse_commands commands_bytes in + { + magic = 0xFEEDFACEl; + cputype; + cpusubtype; + filetype; + ncmds; + sizeofcmds; + flags; + commands; + } + | {| 0xFEEDFACFl: 32 : littleendian; + cputype: 32 : littleendian; + cpusubtype: 32 : littleendian; + filetype: 32 : littleendian; + ncmds: 32 : littleendian; + sizeofcmds: 32 : littleendian; + flags: 32 : littleendian + |} + -> + let commands_size : int = Int32.to_int sizeofcmds * 8 in + let commands_bytes = + Bitstring.takebits commands_size (Bitstring.dropbits (32 * 8) input) + in + (* let () = Bitstring.hexdump_bitstring stdout commands_bytes in *) + let commands = Macho.parse_commands commands_bytes in + { + magic = 0xFEEDFACEl; + cputype; + cpusubtype; + filetype; + ncmds; + sizeofcmds; + flags; + commands; + } + (* | {| _ |} -> + "not a macho" *) + (* raise (ParseError "Input file is not a Macho") *) + (* parse_macho (Bitstring.dropbits (32 * 9) input) 64 *) +in +let () = Macho.summary_print macho in +let imports = Macho.parse_fixups macho input in +Printf.printf ""; +(* let () = Printf.printf "Symbols\n" in +List.iter (fun symbol -> Printf.printf " Type=%s; Name=%s; Lib=%s\n" symbol.typ symbol.name symbol.lib) imports *) diff --git a/macho-ocaml/dune-project b/macho-ocaml/dune-project new file mode 100644 index 0000000..dfbaa5c --- /dev/null +++ b/macho-ocaml/dune-project @@ -0,0 +1,28 @@ +(lang dune 3.7) + +(name macho) + +(generate_opam_files true) + +(source + (github username/reponame)) + +(authors "Author Name") + +(maintainers "Maintainer Name") + +(license LICENSE) + +(documentation https://url/to/documentation) + +(package + (name macho) + (synopsis "A short synopsis") + (description "A longer description") + (depends ocaml dune + ocaml-cstruct + bitstring) + (tags + (topics "to describe" your project))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/macho-ocaml/lib/dune b/macho-ocaml/lib/dune new file mode 100644 index 0000000..b7af58d --- /dev/null +++ b/macho-ocaml/lib/dune @@ -0,0 +1,2 @@ +(library + (name macho)) diff --git a/macho-ocaml/macho.opam b/macho-ocaml/macho.opam new file mode 100644 index 0000000..4e60af5 --- /dev/null +++ b/macho-ocaml/macho.opam @@ -0,0 +1,32 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A short synopsis" +description: "A longer description" +maintainer: ["Maintainer Name"] +authors: ["Author Name"] +license: "LICENSE" +tags: ["topics" "to describe" "your" "project"] +homepage: "https://github.com/username/reponame" +doc: "https://url/to/documentation" +bug-reports: "https://github.com/username/reponame/issues" +depends: [ + "ocaml" + "dune" {>= "3.7"} + "ocaml-cstruct" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/username/reponame.git" diff --git a/macho-ocaml/test/dune b/macho-ocaml/test/dune new file mode 100644 index 0000000..c3756b7 --- /dev/null +++ b/macho-ocaml/test/dune @@ -0,0 +1,2 @@ +(test + (name macho)) diff --git a/macho-ocaml/test/macho.ml b/macho-ocaml/test/macho.ml new file mode 100644 index 0000000..e69de29