From 65278fbb3365d0ad691d0c28d971b99f332f682d Mon Sep 17 00:00:00 2001 From: nganhkhoa Date: Thu, 26 Aug 2021 04:47:32 +0000 Subject: [PATCH] basic macho parsing --- osx/src/lib.rs | 2 + osx/src/macho.rs | 331 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 333 insertions(+) create mode 100644 osx/src/lib.rs create mode 100644 osx/src/macho.rs diff --git a/osx/src/lib.rs b/osx/src/lib.rs new file mode 100644 index 0000000..3bf35e0 --- /dev/null +++ b/osx/src/lib.rs @@ -0,0 +1,2 @@ +mod macho; +pub use macho::Macho; diff --git a/osx/src/macho.rs b/osx/src/macho.rs new file mode 100644 index 0000000..9bb79a6 --- /dev/null +++ b/osx/src/macho.rs @@ -0,0 +1,331 @@ +use std::io::{Read, Write, Seek, SeekFrom}; +use std::fmt; +use std::error::Error; + +use byteorder::{ByteOrder, LittleEndian, BigEndian, ReadBytesExt}; + +trait ReadString: Read { + fn read_utf8(self: &mut Self, len: usize) -> Result> { + let mut buf = vec![0u8; len]; + self.read_exact(&mut buf)?; + Ok(String::from_utf8(buf.split(|&b| b == 0).next().unwrap().to_vec())?) + } +} + +impl ReadString for R {} + +const MH_MAGIC: u32 = 0xfeedface; +const MH_CIGAM: u32 = MH_MAGIC.swap_bytes(); +const MH_MAGIC_64: u32 = 0xfeedfacf; +const MH_CIGAM_64: u32 = MH_MAGIC_64.swap_bytes(); + +const LC_SEGMENT: u32 = 0x1; +const LC_SEGMENT_64: u32 = 0x19; +const LC_CODE_SIGNATURE: u32 = 0x1d; + +pub struct Header { + magic: u32, + cputype: u32, + cpusubtype: u32, + filetype: u32, + ncmds: u32, + sizeofcmds: u32, + flags: u32, + reserved: u32, +} + +impl Header { + fn parse(magic: u32, cursor: &mut R) -> Option
{ + let cputype: u32 = cursor.read_u32::().ok()?; + let cpusubtype: u32 = cursor.read_u32::().ok()?; + let filetype: u32 = cursor.read_u32::().ok()?; + let ncmds: u32 = cursor.read_u32::().ok()?; + let sizeofcmds: u32 = cursor.read_u32::().ok()?; + let flags: u32 = cursor.read_u32::().ok()?; + let reserved: u32 = { + if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 { + cursor.read_u32::().ok()? + } else { + 0 + } + }; + + Some(Header { + magic, + cputype, + cpusubtype, + filetype, + ncmds, + sizeofcmds, + flags, + reserved, + }) + } +} + +pub struct Section { + sectname: String, + segname: String, + addr: u64, + size: u64, + offset: u32, + align: u32, + reloff: u32, + nreloc: u32, + flags: u32, + reserved1: u32, + reserved2: u32, + reserved3: u32, +} + +impl Section { + fn parse_32(cursor: &mut R) -> Option
{ + let sectname = cursor.read_utf8(16).ok()?; + let segname = cursor.read_utf8(16).ok()?; + let addr = cursor.read_u32::().ok()? as u64; + let size = cursor.read_u32::().ok()? as u64; + let offset = cursor.read_u32::().ok()?; + let align = cursor.read_u32::().ok()?; + let reloff = cursor.read_u32::().ok()?; + let nreloc = cursor.read_u32::().ok()?; + let flags = cursor.read_u32::().ok()?; + let reserved1 = cursor.read_u32::().ok()?; + let reserved2 = cursor.read_u32::().ok()?; + Some(Section { + sectname, + segname, + addr, + size, + offset, + align, + reloff, + nreloc, + flags, + reserved1, + reserved2, + reserved3: 0, + }) + } + + fn parse_64(cursor: &mut R) -> Option
{ + let sectname = cursor.read_utf8(16).ok()?; + let segname = cursor.read_utf8(16).ok()?; + let addr = cursor.read_u64::().ok()? as u64; + let size = cursor.read_u64::().ok()? as u64; + let offset = cursor.read_u32::().ok()?; + let align = cursor.read_u32::().ok()?; + let reloff = cursor.read_u32::().ok()?; + let nreloc = cursor.read_u32::().ok()?; + let flags = cursor.read_u32::().ok()?; + let reserved1 = cursor.read_u32::().ok()?; + let reserved2 = cursor.read_u32::().ok()?; + let reserved3 = cursor.read_u32::().ok()?; + Some(Section { + sectname, + segname, + addr, + size, + offset, + align, + reloff, + nreloc, + flags, + reserved1, + reserved2, + reserved3, + }) + } +} + +pub struct Segment { + segname: String, + vmaddr: u64, + vmsize: u64, + fileoff: u64, + filesize: u64, + maxprot: u32, + initprot: u32, + flags: u32, + sections: Vec
, +} + +pub struct Linkedit { + pub dataoff: u32, + pub datasize: u32, +} + +pub enum LoadCommand { + Segment(Segment), + Codesignature(Linkedit), + Cmd(u32, u32), +} + +impl LoadCommand { + fn parse(cursor: &mut R) -> Option { + let cmd = cursor.read_u32::().ok()?; + let cmdsize = cursor.read_u32::().ok()?; + + if cmdsize <= 8 { + // impossible + return None + } + + match cmd { + LC_SEGMENT => { + let segname = cursor.read_utf8(16).ok()?; + let vmaddr = cursor.read_u32::().ok()? as u64; + let vmsize = cursor.read_u32::().ok()? as u64; + let fileoff = cursor.read_u32::().ok()? as u64; + let filesize = cursor.read_u32::().ok()? as u64; + let maxprot = cursor.read_u32::().ok()?; + let initprot = cursor.read_u32::().ok()?; + let nsects = cursor.read_u32::().ok()?; + let flags = cursor.read_u32::().ok()?; + let sections = std::iter::repeat_with(|| Section::parse_32::(cursor)) + .take_while(|x| x.is_some()) + .take(nsects as usize) + .filter_map(|x| x) + .collect::>(); + + Some(LoadCommand::Segment(Segment { + segname, + vmaddr, + vmsize, + fileoff, + filesize, + maxprot, + initprot, + flags, + sections, + })) + }, + LC_SEGMENT_64 => { + let segname = cursor.read_utf8(16).ok()?; + let vmaddr = cursor.read_u64::().ok()?; + let vmsize = cursor.read_u64::().ok()?; + let fileoff = cursor.read_u64::().ok()?; + let filesize = cursor.read_u64::().ok()?; + let maxprot = cursor.read_u32::().ok()?; + let initprot = cursor.read_u32::().ok()?; + let nsects = cursor.read_u32::().ok()?; + let flags = cursor.read_u32::().ok()?; + let sections = std::iter::repeat_with(|| Section::parse_64::(cursor)) + .take_while(|x| x.is_some()) + .take(nsects as usize) + .filter_map(|x| x) + .collect::>(); + + Some(LoadCommand::Segment(Segment { + segname, + vmaddr, + vmsize, + fileoff, + filesize, + maxprot, + initprot, + flags, + sections, + })) + }, + LC_CODE_SIGNATURE => { + let dataoff = cursor.read_u32::().ok()?; + let datasize = cursor.read_u32::().ok()?; + Some(LoadCommand::Codesignature(Linkedit { + dataoff, datasize + })) + }, + + _ => { + let mut buf = vec![0u8; cmdsize as usize - 4*2]; + cursor.read_exact(&mut buf).ok()?; + Some(LoadCommand::Cmd(cmd, cmdsize)) + } + } + + + } +} + +impl fmt::Display for LoadCommand { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + LoadCommand::Segment(segment) => { + writeln!(f, "{}", segment.segname)?; + segment.sections.iter().for_each(|section| { + write!(f, " {}.{}\n", section.segname, section.sectname).ok(); + }); + Ok(()) + }, + LoadCommand::Codesignature(linkedit) => { + write!(f, "Codesignature(dataoff=0x{:x}, datasize={})", linkedit.dataoff, linkedit.datasize) + }, + LoadCommand::Cmd(cmd, cmdsize) => { + write!(f, "cmd=0x{:x} cmdsize={}", cmd, cmdsize) + } + } + } +} + +pub struct Macho { + pub header: Header, + pub commands: Vec, +} + +impl Macho { + pub fn codesignature(self: &Self) -> Option<&Linkedit> { + self.commands + .iter() + .find_map(|cmd| match cmd { + LoadCommand::Codesignature(linkedit) => Some(linkedit), + _ => None + }) + } +} + +#[derive(Debug)] +pub struct MachoParseError { + message: String +} + +impl fmt::Display for MachoParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MachoParseError(message={})", self.message) + } +} + +impl Error for MachoParseError {} + +impl MachoParseError { + fn new(msg: &str) -> Box { + Box::new(MachoParseError { + message: String::from(msg), + }) + } +} + +impl Macho { + pub fn from(cursor: &mut R) -> Result> { + let magic = cursor.read_u32::() + .or(Err(MachoParseError::new("Cannot read magic")))?; + if magic == MH_MAGIC || magic == MH_MAGIC_64 { + Self::parse::(magic, cursor) + } else { + Self::parse::(magic, cursor) + } + } + + pub fn parse(magic: u32, cursor: &mut R) -> Result> { + let header = Header::parse::(magic, cursor) + .ok_or(MachoParseError::new("Cannot parse macho header"))?; + let commands = std::iter::repeat_with(|| LoadCommand::parse::(cursor)) + .take_while(|x| x.is_some()) + .take(header.ncmds as usize) + .filter_map(|x| x) + .collect::>(); + + Ok(Macho { + header, + commands + }) + } +}