This commit is contained in:
2021-02-18 10:42:34 +07:00
parent 26c40f9a4c
commit 852129aec7
36 changed files with 6333 additions and 0 deletions

474
data/mach-o/binary.c Normal file
View File

@ -0,0 +1,474 @@
#include "binary.h"
#include "headers/loader.h"
#include "headers/nlist.h"
#include "headers/fat.h"
#include "read_dyld_info.h"
const int desired_cputype = CPU_TYPE_ARM;
const int desired_cpusubtype = CPU_SUBTYPE_ARM_V7;
static addr_t sym(const struct binary *binary, const char *name, int options);
static void copy_syms(const struct binary *binary, struct data_sym **syms, uint32_t *nsyms, int options);
static void do_load_commands(struct binary *binary) {
struct mach_header *hdr = b_mach_hdr(binary);
if(!prange_check(binary, (prange_t) {hdr, hdr->sizeofcmds})) {
die("not enough room for commands");
}
uint32_t nsegs = 0;
CMD_ITERATE(hdr, cmd) {
if(cmd + 1 > end || cmd > end - 1) {
die("sizeofcmds is not even");
}
if(cmd->cmdsize < sizeof(struct load_command)) {
die("tiny command");
}
if(cmd->cmdsize > (size_t) ((char *) end - (char *) cmd)) {
die("cmdsize overflows (%u)", cmd->cmdsize);
}
uint32_t required = 0;
switch(cmd->cmd) {
MACHO_SPECIALIZE(
case LC_SEGMENT_X:
required = sizeof(segment_command_x);
nsegs++;
break;
)
case LC_REEXPORT_DYLIB:
required = sizeof(struct dylib_command);
break;
case LC_SYMTAB:
required = sizeof(struct symtab_command);
break;
case LC_DYSYMTAB:
required = sizeof(struct dysymtab_command);
break;
case LC_DYLD_INFO:
case LC_DYLD_INFO_ONLY:
required = sizeof(struct dyld_info_command);
break;
case LC_ID_DYLIB:
required = sizeof(struct dylib_command);
break;
}
if(cmd->cmdsize < required) {
die("cmdsize (%u) too small for cmd (0x%x)", cmd->cmdsize, cmd->cmd);
}
}
if(nsegs > MAX_ARRAY(struct data_segment)) {
die("segment overflow");
}
binary->nsegments = nsegs;
struct data_segment *seg = binary->segments = malloc(sizeof(*binary->segments) * binary->nsegments);
CMD_ITERATE(hdr, cmd) {
switch(cmd->cmd) {
MACHO_SPECIALIZE(
case LC_SEGMENT_X: {
segment_command_x *scmd = (void *) cmd;
if((scmd->cmdsize - sizeof(*scmd)) / sizeof(section_x) < scmd->nsects) {
die("section overflow");
}
seg->file_range = (range_t) {binary, scmd->fileoff, scmd->filesize};
seg->vm_range = (range_t) {binary, scmd->vmaddr, scmd->vmsize};
seg->native_segment = cmd;
seg++;
break;
}
)
}
}
}
static void do_symbols(struct binary *binary) {
binary->mach = calloc(sizeof(*binary->mach), 1);
binary->mach->hdr = b_mach_hdr(binary);
CMD_ITERATE(b_mach_hdr(binary), cmd) {
MACHO_SPECIALIZE(
if(cmd->cmd == LC_SEGMENT_X) {
segment_command_x *seg = (void *) cmd;
if(seg->fileoff == 0) {
binary->mach->export_baseaddr = seg->vmaddr;
}
}
)
if(cmd->cmd == LC_SYMTAB) {
struct symtab_command *scmd = (void *) cmd;
if(scmd->nsyms > MAX_ARRAY(struct data_sym) || scmd->nsyms > MAX_ARRAY(struct nlist_64)) {
die("ridiculous number of symbols (%u)", scmd->nsyms);
}
binary->mach->nsyms = scmd->nsyms;
binary->mach->strsize = scmd->strsize;
binary->mach->symtab = rangeconv_off((range_t) {binary, scmd->symoff, scmd->nsyms * (b_pointer_size(binary) == 8 ? sizeof(struct nlist_64) : sizeof(struct nlist))}, MUST_FIND).start;
binary->mach->strtab = rangeconv_off((range_t) {binary, scmd->stroff, scmd->strsize}, MUST_FIND).start;
if(binary->mach->strtab[binary->mach->strsize - 1]) {
die("string table does not end with \\0");
}
} else if(cmd->cmd == LC_DYSYMTAB) {
binary->mach->dysymtab = (void *) cmd;
} else if(cmd->cmd == LC_DYLD_INFO_ONLY || cmd->cmd == LC_DYLD_INFO) {
struct dyld_info_command *dcmd = (void *) cmd;
binary->mach->dyld_info = dcmd;
binary->mach->export_trie = rangeconv_off((range_t) {binary, dcmd->export_off, dcmd->export_size}, MUST_FIND);
}
}
const struct dysymtab_command *dc;
if(binary->mach->symtab && (dc = binary->mach->dysymtab)) {
size_t size;
MACHO_SPECIALIZE_POINTER_SIZE(binary, size = sizeof(nlist_x);)
#define do_it(isym, nsym, x_symtab, x_nsyms) \
if(dc->isym <= binary->mach->nsyms && dc->nsym <= binary->mach->nsyms - dc->isym && dc->nsym <= MAX_ARRAY(struct nlist_64) && dc->nsym <= MAX_ARRAY(struct data_sym)) { \
binary->mach->x_symtab = binary->mach->symtab + dc->isym * size; \
binary->mach->x_nsyms = dc->nsym; \
} else { \
fprintf(stderr, "warning: bad %s/%s (%u, %u)\n", #isym, #nsym, dc->isym, dc->nsym); \
}
do_it(iextdefsym, nextdefsym, ext_symtab, ext_nsyms)
do_it(iundefsym, nundefsym, imp_symtab, imp_nsyms)
#undef do_it
} else {
binary->mach->ext_symtab = binary->mach->symtab;
binary->mach->ext_nsyms = binary->mach->nsyms;
}
}
void b_prange_load_macho(struct binary *binary, prange_t pr, size_t offset, const char *name) {
b_prange_load_macho_nosyms(binary, pr, offset, name);
do_symbols(binary);
binary->_sym = sym;
binary->_copy_syms = copy_syms;
}
void b_prange_load_macho_nosyms(struct binary *binary, prange_t pr, size_t offset, const char *name) {
#define _arg name
binary->valid = true;
binary->header_offset = offset;
if(offset >= pr.size || offset - pr.size < sizeof(struct mach_header)) {
die("not enough room");
}
struct mach_header *hdr = pr.start + offset;
if(hdr->magic == MH_MAGIC) {
// thin file
binary->valid_range = pr;
binary->pointer_size = 4;
} else if(ADDR64 && hdr->magic == MH_MAGIC_64) {
binary->valid_range = pr;
binary->pointer_size = 8;
} else if(hdr->magic == FAT_CIGAM) {
if(offset) die("fat, offset != 0");
struct fat_header *fathdr = (void *) hdr;
struct fat_arch *arch = (void *)(fathdr + 1);
uint32_t nfat_arch = SWAP32(fathdr->nfat_arch);
if(nfat_arch > (pr.size - sizeof(struct fat_header)) / sizeof(struct fat_arch)) {
die("fat header is too small");
}
if(!nfat_arch) {
die("fat file is empty");
}
prange_t fat_pr = {NULL, 0}; /* no, gcc, it won't be used uninitialized */
int highest_score = 0;
while(nfat_arch--) {
int score = 0;
if(desired_cputype != CPU_TYPE_ANY && SWAP32(arch->cputype) == desired_cputype) {
score = 1;
if(arch->cpusubtype == 0 || (desired_cpusubtype != 0 && SWAP32(arch->cpusubtype) == desired_cpusubtype)) {
score = 2;
}
}
if(score >= highest_score) {
highest_score = score;
uint32_t fat_offset = SWAP32(arch->offset);
if(fat_offset >= pr.size || pr.size - fat_offset < sizeof(struct mach_header)) {
die("fat_offset too big");
}
fat_pr = (prange_t) {pr.start + fat_offset, pr.size - fat_offset};
}
arch++;
}
binary->valid_range = fat_pr;
} else if(hdr->magic == MH_CIGAM || hdr->magic == MH_CIGAM_64 || hdr->magic == FAT_MAGIC) {
die("wrong endian");
} else {
die("(%08x) what is this I don't even", hdr->magic);
}
binary->cputype = b_mach_hdr(binary)->cputype;
binary->cpusubtype = b_mach_hdr(binary)->cpusubtype;
do_load_commands(binary);
#undef _arg
}
static inline struct data_sym convert_nlist(const struct binary *binary, const void *nl_, int options) {
struct data_sym result;
MACHO_SPECIALIZE_POINTER_SIZE(binary,
const nlist_x *nl = nl_;
uint32_t strx = nl->n_un.n_strx;
if(strx >= binary->mach->strsize) {
die("insane strx: %u", strx);
}
result.name = binary->mach->strtab + strx;
result.address = nl->n_value;
if((options & TO_EXECUTE) && (nl->n_desc & N_ARM_THUMB_DEF)) {
result.address |= 1;
}
)
return result;
}
void *b_macho_nth_symbol(const struct binary *binary, uint32_t n) {
if(!binary->mach->symtab) {
die("no symbol table");
}
if(n >= binary->mach->nsyms) {
die("sym too high: %u", n);
}
MACHO_SPECIALIZE_POINTER_SIZE(binary,
nlist_x *nl = binary->mach->symtab + n * sizeof(*nl);
if((uint32_t) nl->n_un.n_strx >= binary->mach->strsize) {
die("insane strx: %d", (int) nl->n_un.n_strx);
}
return nl;
)
}
static addr_t sym_nlist(const struct binary *binary, const char *name, int options) {
// I stole dyld's codez
const struct nlist *base = binary->mach->ext_symtab;
for(uint32_t n = binary->mach->ext_nsyms; n > 0; n /= 2) {
const struct nlist *pivot = base + n/2;
struct data_sym ds = convert_nlist(binary, pivot, options);
int cmp = strcmp(name, ds.name);
if(cmp == 0) {
return ds.address;
} else if(cmp > 0) {
base = pivot + 1;
n--;
}
}
for(unsigned int i = 0; i < binary->nreexports; i++) {
addr_t result;
if(result = b_sym(&binary->reexports[i], name, options)) {
return result;
}
}
return 0;
}
static addr_t trie_recurse(const struct binary *binary, void *ptr, char *start, char *end, const char *name0, const char *name, int options) {
if(start == end) return 0;
uint8_t terminal_size = read_int(&ptr, end, uint8_t);
if(terminal_size) {
uint32_t flags = read_uleb128(&ptr, end);
uint32_t address = read_uleb128(&ptr, end);
uint32_t resolver = 0;
if(flags & 0x10) {
resolver = read_uleb128(&ptr, end);
}
if(!name[0]) {
if(resolver) {
fprintf(stderr, "trie_recurse: %s has a resolver; returning failure\n", name0);
return 0;
}
if(flags & 8) {
// indirect definition
address--;
if(address >= binary->nreexports) {
die("invalid sub-library %d", address);
}
return b_sym(&binary->reexports[address], name0, options);
}
if(binary->cputype == CPU_TYPE_ARM && !(options & TO_EXECUTE)) {
address &= ~1u;
}
return ((addr_t) address) + binary->mach->export_baseaddr;
}
}
uint8_t child_count = read_int(&ptr, end, uint8_t);
while(child_count--) {
const char *name2 = name;
char c;
while(1) {
c = read_int(&ptr, end, char);
if(!c) {
uint64_t offset = read_uleb128(&ptr, end);
if(offset >= (size_t) (end - start)) die("invalid child offset");
return trie_recurse(binary, start + offset, start, end, name0, name2, options);
}
if(c != *name2++) {
break;
}
}
// skip the rest
read_cstring(&ptr, end);
read_uleb128(&ptr, end);
}
return 0;
}
static addr_t sym_trie(const struct binary *binary, const char *name, int options) {
return trie_recurse(binary,
binary->mach->export_trie.start,
binary->mach->export_trie.start,
(char *)binary->mach->export_trie.start + binary->mach->export_trie.size,
name,
name,
options);
}
static addr_t sym_private(const struct binary *binary, const char *name, int options) {
if(!binary->mach->symtab) {
die("we wanted %s but there is no symbol table", name);
}
MACHO_SPECIALIZE_POINTER_SIZE(binary,
const nlist_x *base = binary->mach->symtab;
for(uint32_t i = 0; i < binary->mach->nsyms; i++) {
struct data_sym ds = convert_nlist(binary, base + i, options);
if(!strcmp(ds.name, name)) return ds.address;
}
)
return 0;
}
static addr_t sym_imported(const struct binary *binary, const char *name, __unused int options) {
// most of this function is copied and pasted from link.c :$
CMD_ITERATE(b_mach_hdr(binary), cmd) {
MACHO_SPECIALIZE(
if(cmd->cmd == LC_SEGMENT_X) {
segment_command_x *seg = (void *) cmd;
section_x *sect = (void *) (seg + 1);
for(uint32_t i = 0; i < seg->nsects; i++, sect++) {
uint8_t type = sect->flags & SECTION_TYPE;
if(type != S_NON_LAZY_SYMBOL_POINTERS && type != S_LAZY_SYMBOL_POINTERS) continue;
uint32_t indirect_table_offset = sect->reserved1;
uint32_t *indirect = rangeconv_off((range_t) {binary, (addr_t) (binary->mach->dysymtab->indirectsymoff + indirect_table_offset*sizeof(uint32_t)), (sect->size / 4) * sizeof(uint32_t)}, MUST_FIND).start;
for(uint32_t i = 0; i < sect->size / 4; i++) {
uint32_t sym = indirect[i];
if(sym == INDIRECT_SYMBOL_LOCAL || sym == INDIRECT_SYMBOL_ABS) continue;
nlist_x *nl = b_macho_nth_symbol(binary, sym);
if(!strcmp(binary->mach->strtab + nl->n_un.n_strx, name)) {
return sect->addr + 4*i;
}
}
}
}
)
}
return 0;
}
static addr_t sym(const struct binary *binary, const char *name, int options) {
addr_t (*func)(const struct binary *binary, const char *name, int options);
if(options & PRIVATE_SYM)
func = sym_private;
else if(options & IMPORTED_SYM)
func = sym_imported;
else if(binary->mach->export_trie.start)
func = sym_trie;
else
func = sym_nlist;
return func(binary, name, options & ~MUST_FIND);
}
static void copy_syms(const struct binary *binary, struct data_sym **syms, uint32_t *nsyms, int options) {
uint32_t n;
const void *nl;
size_t size;
MACHO_SPECIALIZE_POINTER_SIZE(binary, size = sizeof(nlist_x);)
bool can_be_zero = false;
if(options & PRIVATE_SYM) {
nl = binary->mach->symtab;
n = binary->mach->nsyms;
} else if(options & IMPORTED_SYM) {
nl = binary->mach->imp_symtab;
n = binary->mach->imp_nsyms;
can_be_zero = true;
} else {
nl = binary->mach->ext_symtab;
n = binary->mach->ext_nsyms;
}
struct data_sym *s = *syms = malloc(sizeof(struct data_sym) * n);
for(uint32_t i = 0; i < n; i++) {
*s = convert_nlist(binary, nl, options);
nl += size;
if(can_be_zero || s->address) s++;
}
*nsyms = s - *syms;
}
range_t b_macho_segrange(const struct binary *binary, const char *segname) {
CMD_ITERATE(b_mach_hdr(binary), cmd) {
MACHO_SPECIALIZE(
if(cmd->cmd == LC_SEGMENT_X) {
segment_command_x *seg = (void *) cmd;
if(!strncmp(seg->segname, segname, 16)) {
return (range_t) {binary, seg->vmaddr, seg->filesize};
}
}
)
}
die("no such segment %s", segname);
}
range_t b_macho_sectrange(const struct binary *binary, const char *segname, const char *sectname) {
CMD_ITERATE(b_mach_hdr(binary), cmd) {
MACHO_SPECIALIZE(
if(cmd->cmd == LC_SEGMENT_X) {
segment_command_x *seg = (void *) cmd;
if(!strncmp(seg->segname, segname, 16)) {
section_x *sect = (void *) (seg + 1);
for(uint32_t i = 0; i < seg->nsects; i++) {
if(!strncmp(sect[i].sectname, sectname, 16)) {
return (range_t) {binary, sect->addr, sect->size};
}
}
}
}
)
}
die("no such segment %s", segname);
}
void b_load_macho(struct binary *binary, const char *filename) {
return b_prange_load_macho(binary, load_file(filename, true, NULL), 0, filename);
}
addr_t b_macho_reloc_base(const struct binary *binary) {
// copying dyld's behavior
CMD_ITERATE(b_mach_hdr(binary), cmd) {
MACHO_SPECIALIZE(
if(cmd->cmd == LC_SEGMENT_X) {
segment_command_x *seg = (void *) cmd;
if(b_mach_hdr(binary)->cputype != CPU_TYPE_X86_64 || (seg->initprot & PROT_WRITE)) {
return seg->vmaddr;
}
}
)
}
die("no segments");
}
const char *convert_lc_str(const struct load_command *cmd, uint32_t offset) {
const char *ret = ((const char *) cmd) + offset;
size_t size = cmd->cmdsize - offset;
if(offset >= cmd->cmdsize || strnlen(ret, size) == size) {
die("bad lc_str");
}
return ret;
}

76
data/mach-o/binary.h Normal file
View File

@ -0,0 +1,76 @@
#pragma once
#include "../binary.h"
#include "headers/loader.h"
#define CMD_ITERATE(hdr, cmd) \
for(struct load_command *cmd = \
(struct load_command *) ((uint32_t *) ((hdr) + 1) + (ADDR64 ? ((hdr)->magic & 1) : 0)), \
*end = (struct load_command *) ((char *) cmd + (hdr)->sizeofcmds); \
cmd < end; \
cmd = (struct load_command *) ((char *) cmd + cmd->cmdsize))
#define LC_SEGMENT_X sizeof(_spec_LC_SEGMENT_X)
#define pointer_size_x sizeof(_spec_pointer_size_x)
#define _MACHO_SPECIALIZE(_LC_SEGMENT_X, _segment_command_x, _section_x, _nlist_x, _pointer_size_x, text...) { \
typedef struct _segment_command_x segment_command_x; \
typedef struct _section_x section_x; \
typedef struct _nlist_x nlist_x; \
typedef char _spec_LC_SEGMENT_X[_LC_SEGMENT_X]; \
typedef char _spec_pointer_size_x[_pointer_size_x]; \
text \
}
#define _MACHO_SPECIALIZE_64(text...) _MACHO_SPECIALIZE(LC_SEGMENT_64, segment_command_64, section_64, nlist_64, 8, text)
#define _MACHO_SPECIALIZE_32(text...) _MACHO_SPECIALIZE(LC_SEGMENT, segment_command, section, nlist, 4, text)
#if ADDR64
#define MACHO_SPECIALIZE(text...) _MACHO_SPECIALIZE_64(text) _MACHO_SPECIALIZE_32(text)
#define MACHO_SPECIALIZE_POINTER_SIZE(binary, text...) \
if(b_pointer_size(binary) == 8) _MACHO_SPECIALIZE_64(text) else _MACHO_SPECIALIZE_32(text)
#else
#define MACHO_SPECIALIZE(text...) _MACHO_SPECIALIZE_32(text)
#define MACHO_SPECIALIZE_POINTER_SIZE(binary, text...) _MACHO_SPECIALIZE_32(text)
#endif
struct mach_binary {
// this is unnecessary, don't use it
struct mach_header *hdr;
// this stuff is _all_ symbols...
void *symtab; // either nlist or nlist_64
uint32_t nsyms;
// for b_sym (external stuff)
struct nlist *ext_symtab, *imp_symtab;
uint32_t ext_nsyms, imp_nsyms;
// alternatively
struct dyld_info_command *dyld_info;
prange_t export_trie;
addr_t export_baseaddr;
char *strtab;
uint32_t strsize;
const struct dysymtab_command *dysymtab;
};
__BEGIN_DECLS
static inline struct mach_header *b_mach_hdr(const struct binary *binary) {
return (struct mach_header *) ((char *) binary->valid_range.start + binary->header_offset);
}
__attribute__((pure)) range_t b_macho_segrange(const struct binary *binary, const char *segname);
__attribute__((pure)) range_t b_macho_sectrange(const struct binary *binary, const char *segname, const char *sectname);
void b_prange_load_macho(struct binary *binary, prange_t range, size_t offset, const char *name);
void b_prange_load_macho_nosyms(struct binary *binary, prange_t range, size_t offset, const char *name);
void b_load_macho(struct binary *binary, const char *filename);
void *b_macho_nth_symbol(const struct binary *binary, uint32_t n);
addr_t b_macho_reloc_base(const struct binary *binary);
const char *convert_lc_str(const struct load_command *cmd, uint32_t offset);
__END_DECLS

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/*
* Relocation types used in the arm implementation. Relocation entries for
* things other than instructions use the same generic relocation as discribed
* in <mach-o/reloc.h> and their r_type is ARM_RELOC_VANILLA, one of the
* *_SECTDIFF or the *_PB_LA_PTR types. The rest of the relocation types are
* for instructions. Since they are for instructions the r_address field
* indicates the 32 bit instruction that the relocation is to be preformed on.
*/
enum reloc_type_arm
{
ARM_RELOC_VANILLA, /* generic relocation as discribed above */
ARM_RELOC_PAIR, /* the second relocation entry of a pair */
ARM_RELOC_SECTDIFF, /* a PAIR follows with subtract symbol value */
ARM_RELOC_LOCAL_SECTDIFF, /* like ARM_RELOC_SECTDIFF, but the symbol
referenced was local. */
ARM_RELOC_PB_LA_PTR,/* prebound lazy pointer */
ARM_RELOC_BR24, /* 24 bit branch displacement (to a word address) */
ARM_THUMB_RELOC_BR22, /* 22 bit branch displacement (to a half-word
address) */
ARM_THUMB_32BIT_BRANCH, /* obsolete - a thumb 32-bit branch instruction
possibly needing page-spanning branch workaround */
};

63
data/mach-o/headers/fat.h Normal file
View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
#ifndef _MACH_O_FAT_H_
#define _MACH_O_FAT_H_
/*
* This header file describes the structures of the file format for "fat"
* architecture specific file (wrapper design). At the begining of the file
* there is one fat_header structure followed by a number of fat_arch
* structures. For each architecture in the file, specified by a pair of
* cputype and cpusubtype, the fat_header describes the file offset, file
* size and alignment in the file of the architecture specific member.
* The padded bytes in the file to place each member on it's specific alignment
* are defined to be read as zeros and can be left as "holes" if the file system
* can support them as long as they read as zeros.
*
* All structures defined here are always written and read to/from disk
* in big-endian order.
*/
/*
* <mach/machine.h> is needed here for the cpu_type_t and cpu_subtype_t types
* and contains the constants for the possible values of these types.
*/
#include <stdint.h>
#define FAT_MAGIC 0xcafebabe
#define FAT_CIGAM 0xbebafeca /* NXSwapLong(FAT_MAGIC) */
struct fat_header {
uint32_t magic; /* FAT_MAGIC */
uint32_t nfat_arch; /* number of structs that follow */
};
struct fat_arch {
int cputype; /* cpu specifier (int) */
int cpusubtype; /* machine specifier (int) */
uint32_t offset; /* file offset to this object file */
uint32_t size; /* size of this object file */
uint32_t align; /* alignment as a power of 2 */
};
#endif /* _MACH_O_FAT_H_ */

1340
data/mach-o/headers/loader.h Normal file

File diff suppressed because it is too large Load Diff

302
data/mach-o/headers/nlist.h Normal file
View File

@ -0,0 +1,302 @@
/*
* Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
#ifndef _MACHO_NLIST_H_
#define _MACHO_NLIST_H_
/* $NetBSD: nlist.h,v 1.5 1994/10/26 00:56:11 cgd Exp $ */
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)nlist.h 8.2 (Berkeley) 1/21/94
*/
#include <stdint.h>
/*
* Format of a symbol table entry of a Mach-O file for 32-bit architectures.
* Modified from the BSD format. The modifications from the original format
* were changing n_other (an unused field) to n_sect and the addition of the
* N_SECT type. These modifications are required to support symbols in a larger
* number of sections not just the three sections (text, data and bss) in a BSD
* file.
*/
struct nlist {
union {
#ifndef __LP64__
char *n_name; /* for use when in-core */
#endif
int32_t n_strx; /* index into the string table */
} n_un;
uint8_t n_type; /* type flag, see below */
uint8_t n_sect; /* section number or NO_SECT */
int16_t n_desc; /* see <mach-o/stab.h> */
uint32_t n_value; /* value of this symbol (or stab offset) */
};
/*
* This is the symbol table entry structure for 64-bit architectures.
*/
struct nlist_64 {
union {
uint32_t n_strx; /* index into the string table */
} n_un;
uint8_t n_type; /* type flag, see below */
uint8_t n_sect; /* section number or NO_SECT */
uint16_t n_desc; /* see <mach-o/stab.h> */
uint64_t n_value; /* value of this symbol (or stab offset) */
};
/*
* Symbols with a index into the string table of zero (n_un.n_strx == 0) are
* defined to have a null, "", name. Therefore all string indexes to non null
* names must not have a zero string index. This is bit historical information
* that has never been well documented.
*/
/*
* The n_type field really contains four fields:
* unsigned char N_STAB:3,
* N_PEXT:1,
* N_TYPE:3,
* N_EXT:1;
* which are used via the following masks.
*/
#define N_STAB 0xe0 /* if any of these bits set, a symbolic debugging entry */
#define N_PEXT 0x10 /* private external symbol bit */
#define N_TYPE 0x0e /* mask for the type bits */
#define N_EXT 0x01 /* external symbol bit, set for external symbols */
/*
* Only symbolic debugging entries have some of the N_STAB bits set and if any
* of these bits are set then it is a symbolic debugging entry (a stab). In
* which case then the values of the n_type field (the entire field) are given
* in <mach-o/stab.h>
*/
/*
* Values for N_TYPE bits of the n_type field.
*/
#define N_UNDF 0x0 /* undefined, n_sect == NO_SECT */
#define N_ABS 0x2 /* absolute, n_sect == NO_SECT */
#define N_SECT 0xe /* defined in section number n_sect */
#define N_PBUD 0xc /* prebound undefined (defined in a dylib) */
#define N_INDR 0xa /* indirect */
/*
* If the type is N_INDR then the symbol is defined to be the same as another
* symbol. In this case the n_value field is an index into the string table
* of the other symbol's name. When the other symbol is defined then they both
* take on the defined type and value.
*/
/*
* If the type is N_SECT then the n_sect field contains an ordinal of the
* section the symbol is defined in. The sections are numbered from 1 and
* refer to sections in order they appear in the load commands for the file
* they are in. This means the same ordinal may very well refer to different
* sections in different files.
*
* The n_value field for all symbol table entries (including N_STAB's) gets
* updated by the link editor based on the value of it's n_sect field and where
* the section n_sect references gets relocated. If the value of the n_sect
* field is NO_SECT then it's n_value field is not changed by the link editor.
*/
#define NO_SECT 0 /* symbol is not in any section */
#define MAX_SECT 255 /* 1 thru 255 inclusive */
/*
* Common symbols are represented by undefined (N_UNDF) external (N_EXT) types
* who's values (n_value) are non-zero. In which case the value of the n_value
* field is the size (in bytes) of the common symbol. The n_sect field is set
* to NO_SECT. The alignment of a common symbol may be set as a power of 2
* between 2^1 and 2^15 as part of the n_desc field using the macros below. If
* the alignment is not set (a value of zero) then natural alignment based on
* the size is used.
*/
#define GET_COMM_ALIGN(n_desc) (((n_desc) >> 8) & 0x0f)
#define SET_COMM_ALIGN(n_desc,align) \
(n_desc) = (((n_desc) & 0xf0ff) | (((align) & 0x0f) << 8))
/*
* To support the lazy binding of undefined symbols in the dynamic link-editor,
* the undefined symbols in the symbol table (the nlist structures) are marked
* with the indication if the undefined reference is a lazy reference or
* non-lazy reference. If both a non-lazy reference and a lazy reference is
* made to the same symbol the non-lazy reference takes precedence. A reference
* is lazy only when all references to that symbol are made through a symbol
* pointer in a lazy symbol pointer section.
*
* The implementation of marking nlist structures in the symbol table for
* undefined symbols will be to use some of the bits of the n_desc field as a
* reference type. The mask REFERENCE_TYPE will be applied to the n_desc field
* of an nlist structure for an undefined symbol to determine the type of
* undefined reference (lazy or non-lazy).
*
* The constants for the REFERENCE FLAGS are propagated to the reference table
* in a shared library file. In that case the constant for a defined symbol,
* REFERENCE_FLAG_DEFINED, is also used.
*/
/* Reference type bits of the n_desc field of undefined symbols */
#define REFERENCE_TYPE 0x7
/* types of references */
#define REFERENCE_FLAG_UNDEFINED_NON_LAZY 0
#define REFERENCE_FLAG_UNDEFINED_LAZY 1
#define REFERENCE_FLAG_DEFINED 2
#define REFERENCE_FLAG_PRIVATE_DEFINED 3
#define REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY 4
#define REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY 5
/*
* To simplify stripping of objects that use are used with the dynamic link
* editor, the static link editor marks the symbols defined an object that are
* referenced by a dynamicly bound object (dynamic shared libraries, bundles).
* With this marking strip knows not to strip these symbols.
*/
#define REFERENCED_DYNAMICALLY 0x0010
/*
* For images created by the static link editor with the -twolevel_namespace
* option in effect the flags field of the mach header is marked with
* MH_TWOLEVEL. And the binding of the undefined references of the image are
* determined by the static link editor. Which library an undefined symbol is
* bound to is recorded by the static linker in the high 8 bits of the n_desc
* field using the SET_LIBRARY_ORDINAL macro below. The ordinal recorded
* references the libraries listed in the Mach-O's LC_LOAD_DYLIB load commands
* in the order they appear in the headers. The library ordinals start from 1.
* For a dynamic library that is built as a two-level namespace image the
* undefined references from module defined in another use the same nlist struct
* an in that case SELF_LIBRARY_ORDINAL is used as the library ordinal. For
* defined symbols in all images they also must have the library ordinal set to
* SELF_LIBRARY_ORDINAL. The EXECUTABLE_ORDINAL refers to the executable
* image for references from plugins that refer to the executable that loads
* them.
*
* The DYNAMIC_LOOKUP_ORDINAL is for undefined symbols in a two-level namespace
* image that are looked up by the dynamic linker with flat namespace semantics.
* This ordinal was added as a feature in Mac OS X 10.3 by reducing the
* value of MAX_LIBRARY_ORDINAL by one. So it is legal for existing binaries
* or binaries built with older tools to have 0xfe (254) dynamic libraries. In
* this case the ordinal value 0xfe (254) must be treated as a library ordinal
* for compatibility.
*/
#define GET_LIBRARY_ORDINAL(n_desc) (((n_desc) >> 8) & 0xff)
#define SET_LIBRARY_ORDINAL(n_desc,ordinal) \
(n_desc) = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8))
#define SELF_LIBRARY_ORDINAL 0x0
#define MAX_LIBRARY_ORDINAL 0xfd
#define DYNAMIC_LOOKUP_ORDINAL 0xfe
#define EXECUTABLE_ORDINAL 0xff
/*
* The bit 0x0020 of the n_desc field is used for two non-overlapping purposes
* and has two different symbolic names, N_NO_DEAD_STRIP and N_DESC_DISCARDED.
*/
/*
* The N_NO_DEAD_STRIP bit of the n_desc field only ever appears in a
* relocatable .o file (MH_OBJECT filetype). And is used to indicate to the
* static link editor it is never to dead strip the symbol.
*/
#define N_NO_DEAD_STRIP 0x0020 /* symbol is not to be dead stripped */
/*
* The N_DESC_DISCARDED bit of the n_desc field never appears in linked image.
* But is used in very rare cases by the dynamic link editor to mark an in
* memory symbol as discared and longer used for linking.
*/
#define N_DESC_DISCARDED 0x0020 /* symbol is discarded */
/*
* The N_WEAK_REF bit of the n_desc field indicates to the dynamic linker that
* the undefined symbol is allowed to be missing and is to have the address of
* zero when missing.
*/
#define N_WEAK_REF 0x0040 /* symbol is weak referenced */
/*
* The N_WEAK_DEF bit of the n_desc field indicates to the static and dynamic
* linkers that the symbol definition is weak, allowing a non-weak symbol to
* also be used which causes the weak definition to be discared. Currently this
* is only supported for symbols in coalesed sections.
*/
#define N_WEAK_DEF 0x0080 /* coalesed symbol is a weak definition */
/*
* The N_REF_TO_WEAK bit of the n_desc field indicates to the dynamic linker
* that the undefined symbol should be resolved using flat namespace searching.
*/
#define N_REF_TO_WEAK 0x0080 /* reference to a weak symbol */
/*
* The N_ARM_THUMB_DEF bit of the n_desc field indicates that the symbol is
* a defintion of a Thumb function.
*/
#define N_ARM_THUMB_DEF 0x0008 /* symbol is a Thumb function (ARM) */
#ifndef __STRICT_BSD__
#if __cplusplus
extern "C" {
#endif /* __cplusplus */
/*
* The function nlist(3) from the C library.
*/
extern int nlist (const char *filename, struct nlist *list);
#if __cplusplus
}
#endif /* __cplusplus */
#endif /* __STRICT_BSD__ */
#endif /* _MACHO_LIST_H_ */

202
data/mach-o/headers/reloc.h Normal file
View File

@ -0,0 +1,202 @@
/*
* Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/* $NetBSD: exec.h,v 1.6 1994/10/27 04:16:05 cgd Exp $ */
/*
* Copyright (c) 1993 Christopher G. Demetriou
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _MACHO_RELOC_H_
#define _MACHO_RELOC_H_
#include <stdint.h>
/*
* Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD
* format. The modifications from the original format were changing the value
* of the r_symbolnum field for "local" (r_extern == 0) relocation entries.
* This modification is required to support symbols in an arbitrary number of
* sections not just the three sections (text, data and bss) in a 4.3BSD file.
* Also the last 4 bits have had the r_type tag added to them.
*/
struct relocation_info {
int32_t r_address; /* offset in the section to what is being
relocated */
uint32_t r_symbolnum:24, /* symbol index if r_extern == 1 or section
ordinal if r_extern == 0 */
r_pcrel:1, /* was relocated pc relative already */
r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */
r_extern:1, /* does not include value of sym referenced */
r_type:4; /* if not 0, machine specific relocation type */
};
#define R_ABS 0 /* absolute relocation type for Mach-O files */
/*
* The r_address is not really the address as it's name indicates but an offset.
* In 4.3BSD a.out objects this offset is from the start of the "segment" for
* which relocation entry is for (text or data). For Mach-O object files it is
* also an offset but from the start of the "section" for which the relocation
* entry is for. See comments in <mach-o/loader.h> about the r_address feild
* in images for used with the dynamic linker.
*
* In 4.3BSD a.out objects if r_extern is zero then r_symbolnum is an ordinal
* for the segment the symbol being relocated is in. These ordinals are the
* symbol types N_TEXT, N_DATA, N_BSS or N_ABS. In Mach-O object files these
* ordinals refer to the sections in the object file in the order their section
* structures appear in the headers of the object file they are in. The first
* section has the ordinal 1, the second 2, and so on. This means that the
* same ordinal in two different object files could refer to two different
* sections. And further could have still different ordinals when combined
* by the link-editor. The value R_ABS is used for relocation entries for
* absolute symbols which need no further relocation.
*/
/*
* For RISC machines some of the references are split across two instructions
* and the instruction does not contain the complete value of the reference.
* In these cases a second, or paired relocation entry, follows each of these
* relocation entries, using a PAIR r_type, which contains the other part of the
* reference not contained in the instruction. This other part is stored in the
* pair's r_address field. The exact number of bits of the other part of the
* reference store in the r_address field is dependent on the particular
* relocation type for the particular architecture.
*/
/*
* To make scattered loading by the link editor work correctly "local"
* relocation entries can't be used when the item to be relocated is the value
* of a symbol plus an offset (where the resulting expresion is outside the
* block the link editor is moving, a blocks are divided at symbol addresses).
* In this case. where the item is a symbol value plus offset, the link editor
* needs to know more than just the section the symbol was defined. What is
* needed is the actual value of the symbol without the offset so it can do the
* relocation correctly based on where the value of the symbol got relocated to
* not the value of the expression (with the offset added to the symbol value).
* So for the NeXT 2.0 release no "local" relocation entries are ever used when
* there is a non-zero offset added to a symbol. The "external" and "local"
* relocation entries remain unchanged.
*
* The implemention is quite messy given the compatibility with the existing
* relocation entry format. The ASSUMPTION is that a section will never be
* bigger than 2**24 - 1 (0x00ffffff or 16,777,215) bytes. This assumption
* allows the r_address (which is really an offset) to fit in 24 bits and high
* bit of the r_address field in the relocation_info structure to indicate
* it is really a scattered_relocation_info structure. Since these are only
* used in places where "local" relocation entries are used and not where
* "external" relocation entries are used the r_extern field has been removed.
*
* For scattered loading to work on a RISC machine where some of the references
* are split across two instructions the link editor needs to be assured that
* each reference has a unique 32 bit reference (that more than one reference is
* NOT sharing the same high 16 bits for example) so it move each referenced
* item independent of each other. Some compilers guarantees this but the
* compilers don't so scattered loading can be done on those that do guarantee
* this.
*/
#if defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__)
/*
* The reason for the ifdef's of __BIG_ENDIAN__ and __LITTLE_ENDIAN__ are that
* when stattered relocation entries were added the mistake of using a mask
* against a structure that is made up of bit fields was used. To make this
* design work this structure must be laid out in memory the same way so the
* mask can be applied can check the same bit each time (r_scattered).
*/
#endif /* defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__) */
#define R_SCATTERED 0x80000000 /* mask to be applied to the r_address field
of a relocation_info structure to tell that
is is really a scattered_relocation_info
stucture */
struct scattered_relocation_info {
#ifdef __BIG_ENDIAN__
uint32_t r_scattered:1, /* 1=scattered, 0=non-scattered (see above) */
r_pcrel:1, /* was relocated pc relative already */
r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */
r_type:4, /* if not 0, machine specific relocation type */
r_address:24; /* offset in the section to what is being
relocated */
int32_t r_value; /* the value the item to be relocated is
refering to (without any offset added) */
#endif /* __BIG_ENDIAN__ */
#ifdef __LITTLE_ENDIAN__
uint32_t
r_address:24, /* offset in the section to what is being
relocated */
r_type:4, /* if not 0, machine specific relocation type */
r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */
r_pcrel:1, /* was relocated pc relative already */
r_scattered:1; /* 1=scattered, 0=non-scattered (see above) */
int32_t r_value; /* the value the item to be relocated is
refering to (without any offset added) */
#endif /* __LITTLE_ENDIAN__ */
};
/*
* Relocation types used in a generic implementation. Relocation entries for
* normal things use the generic relocation as discribed above and their r_type
* is GENERIC_RELOC_VANILLA (a value of zero).
*
* Another type of generic relocation, GENERIC_RELOC_SECTDIFF, is to support
* the difference of two symbols defined in different sections. That is the
* expression "symbol1 - symbol2 + constant" is a relocatable expression when
* both symbols are defined in some section. For this type of relocation the
* both relocations entries are scattered relocation entries. The value of
* symbol1 is stored in the first relocation entry's r_value field and the
* value of symbol2 is stored in the pair's r_value field.
*
* A special case for a prebound lazy pointer is needed to beable to set the
* value of the lazy pointer back to its non-prebound state. This is done
* using the GENERIC_RELOC_PB_LA_PTR r_type. This is a scattered relocation
* entry where the r_value feild is the value of the lazy pointer not prebound.
*/
enum reloc_type_generic
{
GENERIC_RELOC_VANILLA, /* generic relocation as discribed above */
GENERIC_RELOC_PAIR, /* Only follows a GENERIC_RELOC_SECTDIFF */
GENERIC_RELOC_SECTDIFF,
GENERIC_RELOC_PB_LA_PTR, /* prebound lazy pointer */
GENERIC_RELOC_LOCAL_SECTDIFF
};
#endif /* _MACHO_RELOC_H_ */

122
data/mach-o/headers/stab.h Normal file
View File

@ -0,0 +1,122 @@
/*
* Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
#ifndef _MACHO_STAB_H_
#define _MACHO_STAB_H_
/* $NetBSD: stab.h,v 1.4 1994/10/26 00:56:25 cgd Exp $ */
/*-
* Copyright (c) 1991 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)stab.h 5.2 (Berkeley) 4/4/91
*/
/*
* This file gives definitions supplementing <nlist.h> for permanent symbol
* table entries of Mach-O files. Modified from the BSD definitions. The
* modifications from the original definitions were changing what the values of
* what was the n_other field (an unused field) which is now the n_sect field.
* These modifications are required to support symbols in an arbitrary number of
* sections not just the three sections (text, data and bss) in a BSD file.
* The values of the defined constants have NOT been changed.
*
* These must have one of the N_STAB bits on. The n_value fields are subject
* to relocation according to the value of their n_sect field. So for types
* that refer to things in sections the n_sect field must be filled in with the
* proper section ordinal. For types that are not to have their n_value field
* relocatated the n_sect field must be NO_SECT.
*/
/*
* Symbolic debugger symbols. The comments give the conventional use for
*
* .stabs "n_name", n_type, n_sect, n_desc, n_value
*
* where n_type is the defined constant and not listed in the comment. Other
* fields not listed are zero. n_sect is the section ordinal the entry is
* refering to.
*/
#define N_GSYM 0x20 /* global symbol: name,,NO_SECT,type,0 */
#define N_FNAME 0x22 /* procedure name (f77 kludge): name,,NO_SECT,0,0 */
#define N_FUN 0x24 /* procedure: name,,n_sect,linenumber,address */
#define N_STSYM 0x26 /* static symbol: name,,n_sect,type,address */
#define N_LCSYM 0x28 /* .lcomm symbol: name,,n_sect,type,address */
#define N_BNSYM 0x2e /* begin nsect sym: 0,,n_sect,0,address */
#define N_OPT 0x3c /* emitted with gcc2_compiled and in gcc source */
#define N_RSYM 0x40 /* register sym: name,,NO_SECT,type,register */
#define N_SLINE 0x44 /* src line: 0,,n_sect,linenumber,address */
#define N_ENSYM 0x4e /* end nsect sym: 0,,n_sect,0,address */
#define N_SSYM 0x60 /* structure elt: name,,NO_SECT,type,struct_offset */
#define N_SO 0x64 /* source file name: name,,n_sect,0,address */
#define N_OSO 0x66 /* object file name: name,,0,0,st_mtime */
#define N_LSYM 0x80 /* local sym: name,,NO_SECT,type,offset */
#define N_BINCL 0x82 /* include file beginning: name,,NO_SECT,0,sum */
#define N_SOL 0x84 /* #included file name: name,,n_sect,0,address */
#define N_PARAMS 0x86 /* compiler parameters: name,,NO_SECT,0,0 */
#define N_VERSION 0x88 /* compiler version: name,,NO_SECT,0,0 */
#define N_OLEVEL 0x8A /* compiler -O level: name,,NO_SECT,0,0 */
#define N_PSYM 0xa0 /* parameter: name,,NO_SECT,type,offset */
#define N_EINCL 0xa2 /* include file end: name,,NO_SECT,0,0 */
#define N_ENTRY 0xa4 /* alternate entry: name,,n_sect,linenumber,address */
#define N_LBRAC 0xc0 /* left bracket: 0,,NO_SECT,nesting level,address */
#define N_EXCL 0xc2 /* deleted include file: name,,NO_SECT,0,sum */
#define N_RBRAC 0xe0 /* right bracket: 0,,NO_SECT,nesting level,address */
#define N_BCOMM 0xe2 /* begin common: name,,NO_SECT,0,0 */
#define N_ECOMM 0xe4 /* end common: name,,n_sect,0,0 */
#define N_ECOML 0xe8 /* end common (local name): 0,,n_sect,0,address */
#define N_LENG 0xfe /* second stab entry with length information */
/*
* for the berkeley pascal compiler, pc(1):
*/
#define N_PC 0x30 /* global pascal symbol: name,,NO_SECT,subtype,line */
#endif /* _MACHO_STAB_H_ */

756
data/mach-o/inject.c Normal file
View File

@ -0,0 +1,756 @@
#include "inject.h"
#include "read_dyld_info.h"
#include "headers/loader.h"
#include "headers/nlist.h"
#include "headers/reloc.h"
#include <stddef.h>
addr_t b_allocate_vmaddr(const struct binary *binary) {
addr_t max = 0;
for(uint32_t i = 0; i < binary->nsegments; i++) {
const range_t *range = &binary->segments[i].vm_range;
addr_t newmax = range->start + range->size;
if(newmax > max) max = newmax;
}
return (max + 0xfff) & ~0xfffu;
}
// this function is used by both b_macho_extend_cmds and b_inject_macho_binary
static void handle_retarded_dyld_info(void *ptr, uint32_t size, int num_segments, bool kill_dylibs, bool kill_dones) {
// seriously, take a look at dyldinfo.cpp from ld64, especially, in this case, the separate handing of different LC_DYLD_INFO sections and the different meaning of BIND_OPCODE_DONE in lazy bind vs the other binds
// not to mention the impossibility of reading this data without knowing every single opcode
// and the lack of nop
uint8_t flat_lookup = BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | (((uint8_t) BIND_SPECIAL_DYLIB_FLAT_LOOKUP) & ~BIND_OPCODE_MASK);
void *end = ptr + size;
while(ptr != end) {
uint8_t byte = read_int(&ptr, end, uint8_t);
uint8_t immediate = byte & BIND_IMMEDIATE_MASK;
uint8_t opcode = byte & BIND_OPCODE_MASK;
switch(opcode){
// things we actually care about:
case BIND_OPCODE_DONE:
if(kill_dones) {
*((uint8_t *) ptr - 1) = BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER;
}
break;
case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: {
// update the segment number
uint8_t *p = ptr - 1;
//printf("incr'ing %u by %u\n", (unsigned int) immediate, (unsigned int) num_segments);
*p = (*p & BIND_OPCODE_MASK) | (immediate + num_segments);
read_uleb128(&ptr, end);
break;
}
case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
if(kill_dylibs) {
*((uint8_t *) ptr - 1) = flat_lookup;
}
break;
case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: {
void *start = ptr - 1;
read_uleb128(&ptr, end);
if(kill_dylibs) {
memset(start, flat_lookup, ptr - start);
}
break;
}
// things we have to get through
case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
ptr += strnlen(ptr, end - ptr);
if(ptr == end)
break;
case BIND_OPCODE_SET_ADDEND_SLEB: // actually sleb (and I like how read_uleb128 and read_sleb128 in dyldinfo.cpp are completely separate functions), but read_uleb128 should work
case BIND_OPCODE_ADD_ADDR_ULEB:
case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
read_uleb128(&ptr, end);
break;
case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
read_uleb128(&ptr, end);
read_uleb128(&ptr, end);
break;
}
}
}
uint32_t b_macho_extend_cmds(struct binary *binary, size_t space) {
size_t old_size = b_mach_hdr(binary)->sizeofcmds;
size_t new_size = old_size + space;
if((new_size >> 12) == (old_size >> 12)) {
// good enough, it'll fit
return (new_size + 0xfff) & ~0xfff;
}
// looks like we need to make a duplicate header and do ugly stuff
size_t stuff_size = (sizeof(struct mach_header) + sizeof(struct segment_command) + sizeof(struct section) + new_size + 0xfff) & ~0xfff;
#define X(a) if(a) a += stuff_size;
CMD_ITERATE(b_mach_hdr(binary), cmd) {
switch(cmd->cmd) {
case LC_SEGMENT: {
struct segment_command *seg = (void *) cmd;
seg->fileoff += stuff_size;
struct section *sect = (void *) (seg + 1);
for(uint32_t i = 0; i < seg->nsects; i++, sect++) {
sect->offset += stuff_size;
X(sect->reloff)
}
break;
}
case LC_SYMTAB: {
struct symtab_command *sym = (void *) cmd;
X(sym->symoff)
X(sym->stroff)
break;
}
case LC_DYSYMTAB: {
struct dysymtab_command *dys = (void *) cmd;
X(dys->tocoff)
X(dys->modtaboff)
X(dys->extrefsymoff)
X(dys->indirectsymoff)
X(dys->extreloff)
X(dys->locreloff)
break;
}
case LC_TWOLEVEL_HINTS: {
struct twolevel_hints_command *two = (void *) cmd;
X(two->offset)
break;
}
case LC_CODE_SIGNATURE:
case LC_SEGMENT_SPLIT_INFO:
case 38 /*LC_FUNCTION_STARTS*/: {
// this is sort of a best (but rather bad) guess - all three commands will probably be screwed up by being moved like this
struct linkedit_data_command *dat = (void *) cmd;
X(dat->dataoff)
break;
}
case LC_ENCRYPTION_INFO: {
struct encryption_info_command *enc = (void *) cmd;
X(enc->cryptoff)
break;
}
case LC_DYLD_INFO:
case LC_DYLD_INFO_ONLY: {
struct dyld_info_command *dyl = (void *) cmd;
X(dyl->rebase_off)
X(dyl->export_off)
#define Y(a) if(dyl->a##_off) { \
prange_t pr = rangeconv_off((range_t) {binary, dyl->a##_off, dyl->a##_size}, MUST_FIND); \
handle_retarded_dyld_info(pr.start, pr.size, 1, false, false); \
dyl->a##_off += stuff_size; \
}
Y(bind)
Y(weak_bind)
Y(lazy_bind)
#undef Y
break;
}
}
}
#undef X
binary->valid_range = pdup(binary->valid_range, ((binary->valid_range.size + 0xfff) & ~0xfff) + stuff_size, stuff_size);
struct mach_header *hdr = binary->valid_range.start;
struct segment_command *seg = (void *) (hdr + 1);
struct section *sect = (void *) (seg + 1);
memcpy(hdr, binary->valid_range.start + stuff_size, sizeof(*hdr));
memcpy(sect + 1, binary->valid_range.start + stuff_size + sizeof(struct mach_header), hdr->sizeofcmds);
hdr->ncmds++;
hdr->sizeofcmds += sizeof(*seg) + sizeof(*sect);
seg->cmd = LC_SEGMENT;
seg->cmdsize = sizeof(*seg) + sizeof(*sect);
// yes, it MUST be called __TEXT.
static const char segname[16] = "__TEXT";
memcpy(seg->segname, segname, 16);
seg->vmaddr = b_allocate_vmaddr(binary);
seg->vmsize = stuff_size;
seg->fileoff = 0;
seg->filesize = stuff_size;
seg->maxprot = seg->initprot = PROT_READ | PROT_EXEC;
seg->nsects = 1;
seg->flags = 0;
// we need a section to make codesign_allocate happy
static const char sectname[16] = "__useless";
memcpy(sect->sectname, sectname, 16);
memcpy(sect->segname, segname, 16);
sect->addr = seg->vmaddr + stuff_size;
sect->size = 0;
sect->offset = stuff_size;
sect->align = 0;
sect->reloff = 0;
sect->nreloc = 0;
sect->flags = 0;
sect->reserved1 = 0;
sect->reserved2 = 0;
return stuff_size - sizeof(struct mach_header);
}
// cctool's checkout.c insists on this exact order
enum {
MM_BIND, MM_WEAK_BIND, MM_LAZY_BIND,
MM_LOCREL,
MM_SYMTAB,
MM_LOCALSYM, MM_EXTDEFSYM, MM_UNDEFSYM,
MM_EXTREL,
MM_INDIRECT,
MM_STRTAB,
NMOVEME
};
struct linkedit_info {
arange_t linkedit_range;
void *linkedit_ptr;
// things we need to move:
// 0. string table
// 1-3. {local, extdef, undef}sym
// 4-5. {locrel, extrel}
// 6. indirect syms
// 7-9. dyld info {, weak_, lazy_}bind
// [hey, I will just assume that nobody has any section relocations because it makes things simpler!]
// things we need to update:
// - symbols reference string table
// - relocations reference symbols
// - indirect syms reference symbols
// - (section data references indirect syms)
struct moveme {
uint32_t *off, *size;
uint32_t element_size;
int off_base;
void *copied_to;
void *copied_from;
uint32_t copied_size;
} moveme[NMOVEME];
struct symtab_command *symtab;
struct dysymtab_command *dysymtab;
struct dyld_info_command *dyld_info;
};
static const struct moveref {
int target;
ptrdiff_t offset;
} moveref[NMOVEME] = {
[MM_LOCALSYM] = {MM_STRTAB, offsetof(struct nlist, n_un.n_strx)},
[MM_EXTDEFSYM] = {MM_STRTAB, offsetof(struct nlist, n_un.n_strx)},
[MM_UNDEFSYM] = {MM_STRTAB, offsetof(struct nlist, n_un.n_strx)},
// hooray for little endian
[MM_LOCREL] = {MM_UNDEFSYM, 4},
[MM_EXTREL] = {MM_UNDEFSYM, 4},
// the whole thing is a symbol number
[MM_INDIRECT] = {MM_UNDEFSYM, 0}
};
static bool catch_linkedit(struct mach_header *hdr, struct linkedit_info *li, bool patch) {
memset(li, 0, sizeof(*li));
bool ret = false;
CMD_ITERATE(hdr, cmd) {
restart:
switch(cmd->cmd) {
case LC_SEGMENT: {
struct segment_command *seg = (void *) cmd;
if(!strcmp(seg->segname, "__LINKEDIT")) {
li->linkedit_range.start = seg->fileoff;
li->linkedit_range.size = seg->filesize;
ret = true;
goto patchout;
break;
}
break;
}
case LC_SYMTAB: {
struct symtab_command *symtab = (void *) cmd;
li->symtab = symtab;
li->moveme[MM_STRTAB].off = &symtab->stroff;
li->moveme[MM_STRTAB].size = &symtab->strsize;
li->moveme[MM_STRTAB].element_size = 1;
li->moveme[MM_SYMTAB].off = &symtab->symoff;
li->moveme[MM_SYMTAB].size = &symtab->nsyms;
li->moveme[MM_SYMTAB].element_size = sizeof(struct nlist);
li->moveme[MM_SYMTAB].off_base = -1;
break;
}
case LC_DYSYMTAB: {
struct dysymtab_command *dys = (void *) cmd;
li->dysymtab = dys;
li->moveme[MM_LOCALSYM].off = &dys->ilocalsym;
li->moveme[MM_LOCALSYM].size = &dys->nlocalsym;
li->moveme[MM_LOCALSYM].element_size = sizeof(struct nlist);
li->moveme[MM_LOCALSYM].off_base = MM_SYMTAB;
li->moveme[MM_EXTDEFSYM].off = &dys->iextdefsym;
li->moveme[MM_EXTDEFSYM].size = &dys->nextdefsym;
li->moveme[MM_EXTDEFSYM].element_size = sizeof(struct nlist);
li->moveme[MM_EXTDEFSYM].off_base = MM_SYMTAB;
li->moveme[MM_UNDEFSYM].off = &dys->iundefsym;
li->moveme[MM_UNDEFSYM].size = &dys->nundefsym;
li->moveme[MM_UNDEFSYM].element_size = sizeof(struct nlist);
li->moveme[MM_UNDEFSYM].off_base = MM_SYMTAB;
li->moveme[MM_LOCREL].off = &dys->locreloff;
li->moveme[MM_LOCREL].size = &dys->nlocrel;
li->moveme[MM_LOCREL].element_size = sizeof(struct relocation_info);
li->moveme[MM_EXTREL].off = &dys->extreloff;
li->moveme[MM_EXTREL].size = &dys->nextrel;
li->moveme[MM_EXTREL].element_size = sizeof(struct relocation_info);
li->moveme[MM_INDIRECT].off = &dys->indirectsymoff;
li->moveme[MM_INDIRECT].size = &dys->nindirectsyms;
li->moveme[MM_INDIRECT].element_size = 4;
break;
}
case LC_DYLD_INFO_ONLY:
case LC_DYLD_INFO: {
struct dyld_info_command *di = (void *) cmd;
li->dyld_info = di;
if(patch) {
di->rebase_off = 0;
di->rebase_size = 0;
di->export_off = 0;
di->export_size = 0;
}
li->moveme[MM_BIND].off = &di->bind_off;
li->moveme[MM_BIND].size = &di->bind_size;
li->moveme[MM_BIND].element_size = 1;
li->moveme[MM_WEAK_BIND].off = &di->weak_bind_off;
li->moveme[MM_WEAK_BIND].size = &di->weak_bind_size;
li->moveme[MM_WEAK_BIND].element_size = 1;
li->moveme[MM_LAZY_BIND].off = &di->lazy_bind_off;
li->moveme[MM_LAZY_BIND].size = &di->lazy_bind_size;
li->moveme[MM_LAZY_BIND].element_size = 1;
break;
}
patchout:
case LC_CODE_SIGNATURE:
case LC_SEGMENT_SPLIT_INFO:
case 38 /*LC_FUNCTION_STARTS*/:
// hope you didn't need that stuff <3
if(patch) {
hdr->sizeofcmds -= cmd->cmdsize;
size_t copysize = hdr->sizeofcmds - ((char *) cmd - (char *) (hdr + 1));
hdr->ncmds--;
memcpy(cmd, (char *) cmd + cmd->cmdsize, copysize);
// update this thing from the CMD_ITERATE macro
end = (void *) (hdr + 1) + hdr->sizeofcmds;
// don't run off the end
if(!copysize) goto end;
goto restart;
}
break;
}
}
end:
// we want both binaries to have a symtab and dysymtab, makes things easier
if(!li->symtab || !li->dysymtab) die("symtab/dysymtab missing");
return ret;
}
static void fixup_stub_helpers(int cputype, void *base, size_t size, uint32_t incr) {
if(!size) return;
size_t skip_begin, skip_end, offset, stride;
switch(cputype) {
case CPU_TYPE_ARM:
skip_begin = 0x24;
skip_end = 0;
offset = 8;
stride = 0xc;
break;
case CPU_TYPE_X86:
skip_begin = 0;
skip_end = 0xa;
offset = 1;
stride = 0xa;
break;
default:
die("stub_helpers, but unknown cpu type");
}
if(size < (skip_begin + skip_end)) {
die("unknown stub_helpers format (too small)");
}
base += skip_begin; size -= skip_begin;
while(size >= skip_end + stride) {
*((uint32_t *) (base + offset)) += incr;
base += stride; size -= stride;
}
}
void b_inject_macho_binary(struct binary *target, const struct binary *binary, addr_t (*find_hack_func)(const struct binary *binary), bool userland) {
#define ADD_COMMAND(size) ({ \
void *ret = (char *) hdr + sizeof(struct mach_header) + hdr->sizeofcmds; \
uint32_t newsize = hdr->sizeofcmds + size; \
if(newsize > sizeofcmds_limit) { \
die("not enough space for commands"); \
} \
hdr->ncmds++; \
hdr->sizeofcmds += (uint32_t) (size); \
ret; \
})
#define ADD_SEGMENT(size) ({ \
uint32_t ret = (seg_off + 0xfff) & ~0xfff; \
seg_off = ret + (size); \
ret; \
})
#define ADD_SEGMENT_ADDR(size) ({ \
uint32_t ret = (seg_addr + 0xfff) & ~0xfff; \
seg_addr = ret + (size); \
ret; \
})
// the 0x100 is arbitrary, but intended to please codesign_allocate
uint32_t sizeofcmds_limit = b_macho_extend_cmds(target, b_mach_hdr(binary)->sizeofcmds + 0x100);
size_t seg_off = target->valid_range.size;
addr_t seg_addr = 0;
struct mach_header *hdr = b_mach_hdr(target);
hdr->flags &= ~MH_PIE;
const struct binary *binaries[] = {binary, target};
// in userland mode, we cut off the LINKEDIT segment (for target, only if it's at the end of the binary)
struct linkedit_info li[2];
if(userland) {
for(int i = 0; i < 2; i++) {
if(catch_linkedit(b_mach_hdr(binaries[i]), &li[i], i == 1)) {
li[i].linkedit_ptr = rangeconv_off((range_t) {binaries[i], li[i].linkedit_range.start, li[i].linkedit_range.size}, MUST_FIND).start;
}
}
if((size_t) (li[1].linkedit_range.start + li[1].linkedit_range.size) == seg_off) {
target->valid_range.size = seg_off = li[1].linkedit_range.start;
}
if((li[0].dyld_info != 0) != (li[1].dyld_info != 0)) {
die("LC_DYLD_INFO(_ONLY) should be in both or neither");
}
}
uint32_t init_ptrs[100];
unsigned num_init_ptrs = 0;
uint32_t *reserved1s[100];
unsigned num_reserved1s = 0;
struct copy { ptrdiff_t off; void *start; size_t size; } copies[100];
unsigned num_copies = 0;
unsigned num_segments = 0;
if(userland) {
CMD_ITERATE(hdr, cmd) {
if(cmd->cmd == LC_SEGMENT) {
num_segments++;
struct segment_command *seg = (void *) cmd;
struct section *sections = (void *) (seg + 1);
for(uint32_t i = 0; i < seg->nsects; i++) {
struct section *sect = &sections[i];
switch(sect->flags & SECTION_TYPE) {
case S_NON_LAZY_SYMBOL_POINTERS:
case S_LAZY_SYMBOL_POINTERS:
case S_SYMBOL_STUBS:
if(num_reserved1s < 100) reserved1s[num_reserved1s++] = &sect->reserved1;
break;
}
if(li[0].dyld_info && !strcmp(sect->sectname, "__stub_helper")) {
void *segdata = rangeconv_off((range_t) {target, seg->fileoff, seg->filesize}, MUST_FIND).start;
fixup_stub_helpers(hdr->cputype, segdata + sect->offset - seg->fileoff, sect->size, *li[0].moveme[MM_LAZY_BIND].size);
}
}
}
}
}
CMD_ITERATE(b_mach_hdr(binary), cmd) {
switch(cmd->cmd) {
case LC_SEGMENT: {
struct segment_command *seg = (void *) cmd;
if(userland && !strcmp(seg->segname, "__LINKEDIT")) continue;
size_t size = sizeof(struct segment_command) + seg->nsects * sizeof(struct section);
// make seg_addr useful
addr_t new_addr = seg->vmaddr + seg->vmsize;
if(new_addr > seg_addr) seg_addr = new_addr;
struct segment_command *newseg = ADD_COMMAND(size);
memcpy(newseg, seg, size);
prange_t pr = rangeconv_off((range_t) {binary, seg->fileoff, seg->filesize}, MUST_FIND);
newseg->fileoff = (uint32_t) ADD_SEGMENT(pr.size);
//printf("setting fileoff to %u\n", newseg->fileoff);
if(num_copies < 100) copies[num_copies++] = (struct copy) {newseg->fileoff, pr.start, pr.size};
struct section *sections = (void *) (newseg + 1);
for(uint32_t i = 0; i < seg->nsects; i++) {
struct section *sect = &sections[i];
sect->offset = newseg->fileoff + sect->addr - newseg->vmaddr;
// ZEROFILL is okay because iBoot always zeroes vmsize - filesize
if(!userland && (sect->flags & SECTION_TYPE) == S_MOD_INIT_FUNC_POINTERS) {
uint32_t *p = rangeconv_off((range_t) {binary, sect->offset, sect->size}, MUST_FIND).start;
size_t num = sect->size / 4;
while(num--) {
if(num_init_ptrs < 100) init_ptrs[num_init_ptrs++] = *p++;
}
}
}
break;
}
case LC_LOAD_DYLIB:
if(userland) {
void *newcmd = ADD_COMMAND(cmd->cmdsize);
memcpy(newcmd, cmd, cmd->cmdsize);
}
break;
}
}
// now deal with the init pointers (if not userland)
// this code is really gross
if(num_init_ptrs > 0) {
if(num_init_ptrs == 1) { // hey, correct plurals are nice
fprintf(stderr, "note: 1 constructor function is present; using the hack_func\n");
} else {
fprintf(stderr, "note: %d constructor functions are present; using the hack_func\n", num_init_ptrs);
}
if(!find_hack_func) {
die("...but there was no find_hack_func");
}
// ldr pc, [pc]
uint16_t part0[] = {0xf8df, 0xf000};
// push {r0-r3, lr}; adr lr, f+1; ldr pc, a; f: b next; a: .long 0; next:
// (the address of the init func)
//
uint16_t part1[] = {0xb50f, 0xf20f, 0x0e07, 0xf8df, 0xf004, 0xe001};
// (bytes_to_move bytes of stuff)
// pop {r0-r3, lr}
static const uint16_t part2[] = {0xe8bd, 0x400f};
// ldr pc, [pc]
static const uint16_t part3[] = {0xf8df, 0xf000};
uint32_t bytes_to_move = 12; // don't cut the MRC in two!
addr_t hack_func = find_hack_func(target);
fprintf(stderr, "hack_func = %08llx\n", (long long) hack_func);
prange_t hack_func_pr = rangeconv((range_t) {target, hack_func & ~1, bytes_to_move}, MUST_FIND);
// allocate a new segment for the stub
uint32_t stub_size = (uint32_t) ((sizeof(part1) + 4) * num_init_ptrs + sizeof(part2) + bytes_to_move + sizeof(part3) + 4);
if(!(hack_func & 1)) {
die("hack func 0x%llx is not thumb", (uint64_t) hack_func);
}
struct segment_command *newseg = ADD_COMMAND(sizeof(struct segment_command));
newseg->cmd = LC_SEGMENT;
newseg->cmdsize = sizeof(struct segment_command);
memset(newseg->segname, 0, 16);
strcpy(newseg->segname, "__CRAP");
newseg->vmaddr = ADD_SEGMENT_ADDR(stub_size);
newseg->vmsize = stub_size;
newseg->fileoff = ADD_SEGMENT(stub_size);
newseg->filesize = stub_size;
newseg->maxprot = newseg->initprot = PROT_READ | PROT_EXEC;
newseg->nsects = 0;
newseg->flags = 0;
void *ptr = malloc(stub_size);
for(unsigned i = 0; i < num_init_ptrs; i++) {
memcpy(ptr, part1, sizeof(part1));
ptr += sizeof(part1);
memcpy(ptr, &init_ptrs[i], 4);
ptr += 4;
part1[0] = 0x46c0;
}
memcpy(ptr, part2, sizeof(part2));
ptr += sizeof(part2);
memcpy(ptr, hack_func_pr.start, bytes_to_move);
ptr += bytes_to_move;
memcpy(ptr, part3, sizeof(part3));
ptr += sizeof(part3);
uint32_t new_addr = hack_func + bytes_to_move;
memcpy(ptr, &new_addr, 4);
ptr += 4;
new_addr = newseg->vmaddr | 1;
memcpy(hack_func_pr.start, part0, sizeof(part0));
memcpy(hack_func_pr.start + sizeof(part0), &new_addr, 4);
if(num_copies < 100) copies[num_copies++] = (struct copy) {newseg->fileoff, ptr, stub_size};
}
autofree char *linkedit = NULL;
if(userland) {
// build the new LINKEDIT
uint32_t newsize = 0;
for(int i = 0; i < NMOVEME; i++) {
for(int l = 0; l < 2; l++) {
struct moveme *m = &li[l].moveme[i];
if(!m->size) {
static uint32_t zero = 0;
m->size = m->off = &zero;
m->element_size = 1;
}
if(m->off_base != -1) {
newsize += *m->size * m->element_size;
}
}
}
if(newsize != 0) {
uint32_t linkedit_off = ADD_SEGMENT(newsize);
linkedit = malloc(newsize);
uint32_t off = 0;
for(int i = 0; i < NMOVEME; i++) {
uint32_t s = 0;
for(int l = 0; l < 2; l++) {
struct moveme *m = &li[l].moveme[i];
m->copied_size = *m->size * m->element_size;
m->copied_to = linkedit + off + s;
if(m->off_base > 0) {
// the value is an index into a table represented by another moveme (i.e. the symtab)
m->copied_from = li[l].moveme[m->off_base].copied_from + *m->off * m->element_size;
} else {
// the value is a file offset
// if 0, just plain copy; if -1, the references will handle copying
m->copied_from = li[l].linkedit_ptr - li[l].linkedit_range.start + *m->off;
}
if(m->off_base != -1) {
memcpy(m->copied_to, m->copied_from, m->copied_size);
}
s += m->copied_size;
}
//printf("i=%d s=%u off=%u\n", i, s, off);
// update the one to load
struct moveme *m = &li[1].moveme[i];
*m->off = linkedit_off + off;
if(m->off_base > 0) {
*m->off = (*m->off - *li[1].moveme[m->off_base].off) / m->element_size;
}
*m->size = s / m->element_size;
if(m->off_base != -1) {
off += s;
}
}
// update struct references (which are out of order, yay)
off = 0;
for(int i = 0; i < 2; i++) {
for(int j = MM_LOCREL; j <= MM_INDIRECT; j++) {
int k = moveref[j].target;
if(!k) continue;
struct moveme *m = &li[i].moveme[j];
for(void *ptr = m->copied_to; ptr < m->copied_to + m->copied_size; ptr += m->element_size) {
uint32_t diff = 0;
int b = li[i].moveme[k].off_base;
if(b > 0) {
// A1 A2 B1 B2 C1 C2
// 0: <--------->
// 1: <------------>
int orig_off = (li[i].moveme[k].copied_from - li[i].moveme[b].copied_from) / li[i].moveme[k].element_size;
int new_off = (li[i].moveme[k].copied_to - li[0].moveme[b].copied_to) / li[i].moveme[k].element_size;
diff = new_off - orig_off;
} else {
// A B
// 0:
// 1: <->
if(i == 1) {
diff = li[0].moveme[k].copied_size / li[0].moveme[k].element_size;
}
}
uint32_t *p = ptr + moveref[j].offset;
if(*p < 0x10000000) *p += diff;
}
}
}
// update library numbers in symbol table
{
struct moveme *restrict m = &li[0].moveme[MM_UNDEFSYM];
for(struct nlist *nl = m->copied_to; (void *) (nl + 1) <= (m->copied_to + m->copied_size); nl++) {
unsigned lib = GET_LIBRARY_ORDINAL(nl->n_desc);
if(lib != SELF_LIBRARY_ORDINAL && lib <= MAX_LIBRARY_ORDINAL) {
SET_LIBRARY_ORDINAL(nl->n_desc, DYNAMIC_LOOKUP_ORDINAL);
}
}
}
// ... and update section references
for(unsigned i = 0; i < num_reserved1s; i++) {
*reserved1s[i] += *li[0].moveme[MM_INDIRECT].size;
}
// ... and dyld info
if(li->dyld_info) {
for(int i = MM_BIND; i <= MM_LAZY_BIND; i++) {
if(*li[1].moveme[i].off) {
handle_retarded_dyld_info(linkedit - linkedit_off + *li[1].moveme[i].off, *li[0].moveme[i].size, num_segments, true, i != MM_LAZY_BIND);
}
}
}
struct segment_command *newseg = ADD_COMMAND(sizeof(struct segment_command));
newseg->cmd = LC_SEGMENT;
newseg->cmdsize = sizeof(struct segment_command);
memset(newseg->segname, 0, 16);
strcpy(newseg->segname, "__LINKEDIT");
newseg->vmaddr = ADD_SEGMENT_ADDR(newsize);
newseg->vmsize = (newsize + 0xfff) & ~0xfff;
newseg->fileoff = linkedit_off;
newseg->filesize = newsize;
newseg->maxprot = newseg->initprot = PROT_READ | PROT_WRITE;
newseg->nsects = 0;
newseg->flags = 0;
//printf("off=%d newsize=%d\n", linkedit_off, newsize);
if(num_copies < 100) copies[num_copies++] = (struct copy) {linkedit_off, linkedit, newsize};
}
}
// finally, expand the binary in memory and actually copy in the new stuff
target->valid_range = pdup(target->valid_range, seg_off, 0);
for(unsigned i = 0; i < num_copies; i++) {
memcpy(target->valid_range.start + copies[i].off, copies[i].start, copies[i].size);
}
}

10
data/mach-o/inject.h Normal file
View File

@ -0,0 +1,10 @@
#pragma once
#include "binary.h"
addr_t b_allocate_vmaddr(const struct binary *binary);
// these two functions will modify binary->valid_range and trash everything else.
uint32_t b_macho_extend_cmds(struct binary *binary, size_t space);
// this function works for both the kernel and uselrand binaries. for userland, pass NULL for find_hack_func.
void b_inject_macho_binary(struct binary *target, const struct binary *inject, addr_t (*find_hack_func)(const struct binary *binary), bool userland);

466
data/mach-o/link.c Normal file
View File

@ -0,0 +1,466 @@
#include "link.h"
#include "headers/loader.h"
#include "headers/nlist.h"
#include "headers/reloc.h"
#include "headers/arm_reloc.h"
#include <ctype.h>
#include "read_dyld_info.h"
static addr_t lookup_symbol_or_do_stuff(lookupsym_t lookup_sym, void *context, const char *name, bool weak, bool userland) {
addr_t sym = lookup_sym(context, name);
if(!sym) {
if(userland) {
// let it pass
} else if(!strcmp(name, "dyld_stub_binder")) {
sym = 0xdeadbeef;
} else if(weak) {
fprintf(stderr, "lookup_nth_symbol: warning: couldn't find weak symbol %s\n", name);
} else {
die("couldn't find symbol %s\n", name);
}
}
return sym;
}
static addr_t lookup_nth_symbol(const struct binary *load, uint32_t symbolnum, lookupsym_t lookup_sym, void *context, bool userland) {
struct nlist *nl = b_macho_nth_symbol(load, symbolnum);
bool weak = nl->n_desc & N_WEAK_REF;
const char *name = load->mach->strtab + nl->n_un.n_strx;
return lookup_symbol_or_do_stuff(lookup_sym, context, name, weak, userland);
}
static void relocate_area(struct binary *load, uint32_t reloff, uint32_t nreloc, enum reloc_mode mode, lookupsym_t lookup_sym, void *context, addr_t slide) {
struct relocation_info *things = rangeconv_off((range_t) {load, reloff, nreloc * sizeof(struct relocation_info)}, MUST_FIND).start;
for(uint32_t i = 0; i < nreloc; i++) {
if(things[i].r_length != 2) {
die("bad relocation length");
}
addr_t address = things[i].r_address;
if(address == 0 || things[i].r_symbolnum == R_ABS) continue;
address += b_macho_reloc_base(load);
uint32_t *p = rangeconv((range_t) {load, address, 4}, MUST_FIND).start;
addr_t value;
if(things[i].r_extern) {
if(mode == RELOC_LOCAL_ONLY) continue;
value = lookup_nth_symbol(load, things[i].r_symbolnum, lookup_sym, context, mode == RELOC_USERLAND);
if(value == 0 && mode == RELOC_USERLAND) continue;
} else {
if(mode == RELOC_EXTERN_ONLY || mode == RELOC_USERLAND) continue;
// *shrug*
value = slide;
}
things[i].r_address = 0;
things[i].r_symbolnum = R_ABS;
if(mode == RELOC_EXTERN_ONLY && things[i].r_type != ARM_RELOC_VANILLA) {
die("non-VANILLA relocation but we are relocating without knowing the slide; use __attribute__((long_call)) to get rid of these");
}
switch(things[i].r_type) {
case ARM_RELOC_VANILLA:
//printf("%x, %x += %x\n", address, *p, value);
if(rangeconv((range_t) {load, *p, 0}, 0).start) {
// when dyld_stub_binding_helper (which would just crash, btw) is present, entries in the indirect section point to it; usually this increments to point to the right dyld_stub_binding_helper, then that's clobbered by the indirect code. when we do prelinking, the indirect code runs first and we would be relocating the already-correctly-located importee symbol, so we add this check (easier than actually checking that it's not in the indirect section) to make sure we're not relocating nonsense.
*p += value;
}
//else printf("skipping %x\n", *p);
break;
case ARM_RELOC_BR24: {
if(!things[i].r_pcrel) die("weird relocation");
uint32_t ins = *p;
uint32_t off = ins & 0x00ffffff;
if(ins & 0x00800000) off |= 0xff000000;
off <<= 2;
off += (value - slide);
if((off & 0xfc000000) != 0 &&
(off & 0xfc000000) != 0xfc000000) {
die("BR24 relocation out of range");
}
uint32_t cond = ins >> 28;
if(value & 1) {
if(cond != 0xe && cond != 0xf) die("can't convert BL with condition to BLX (which must be unconditional)");
ins = (ins & 0x0effffff) | 0xf0000000 | ((off & 2) << 24);
} else if(cond == 0xf) {
ins = (ins & 0x0fffffff) | 0xe0000000;
}
ins = (ins & 0xff000000) | ((off >> 2) & 0x00ffffff);
*p = ins;
break;
}
default:
die("unknown relocation type %d", things[i].r_type);
}
}
}
static void go_indirect(struct binary *load, uint32_t offset, uint32_t size, uint32_t flags, uint32_t reserved1, uint32_t reserved2, enum reloc_mode mode, lookupsym_t lookup_sym, void *context, addr_t slide) {
uint8_t type = flags & SECTION_TYPE;
uint8_t pointer_size = b_pointer_size(load);
switch(type) {
case S_NON_LAZY_SYMBOL_POINTERS:
case S_LAZY_SYMBOL_POINTERS: {
uint32_t indirect_table_offset = reserved1;
const struct dysymtab_command *dysymtab = load->mach->dysymtab;
uint32_t stride = type == S_SYMBOL_STUBS ? reserved2 : pointer_size;
uint32_t num_syms = size / stride;
if(stride < pointer_size ||
num_syms * stride != size ||
dysymtab->nindirectsyms > ((addr_t) -(dysymtab->indirectsymoff - 1)) / sizeof(uint32_t) ||
indirect_table_offset > dysymtab->nindirectsyms ||
num_syms > dysymtab->nindirectsyms - indirect_table_offset) {
die("bad indirect section");
}
uint32_t *indirect_syms = rangeconv_off((range_t) {load, (addr_t) dysymtab->indirectsymoff + indirect_table_offset * sizeof(uint32_t), num_syms * sizeof(uint32_t)}, MUST_FIND).start;
void *addrs = rangeconv_off((range_t) {load, offset, size}, MUST_FIND).start;
for(uint32_t i = 0; i < num_syms; i++, indirect_syms++, addrs += stride) {
addr_t addr, found_addr;
switch(*indirect_syms) {
case INDIRECT_SYMBOL_LOCAL:
if(mode == RELOC_EXTERN_ONLY || mode == RELOC_USERLAND) continue;
addr = read_pointer(addrs, pointer_size) + slide;
break;
case INDIRECT_SYMBOL_ABS:
continue;
default:
if(mode == RELOC_LOCAL_ONLY) continue;
found_addr = lookup_nth_symbol(load, *indirect_syms, lookup_sym, context, mode == RELOC_USERLAND);
if(!found_addr && mode == RELOC_USERLAND) {
// don't set to ABS!
continue;
}
addr = found_addr;
break;
}
write_pointer(addrs, addr, pointer_size);
*indirect_syms = INDIRECT_SYMBOL_ABS;
}
break;
}
case S_ZEROFILL:
case S_MOD_INIT_FUNC_POINTERS:
case S_MOD_TERM_FUNC_POINTERS:
case S_REGULAR:
case S_CSTRING_LITERALS:
case S_4BYTE_LITERALS:
case S_8BYTE_LITERALS:
case S_16BYTE_LITERALS:
break;
default:
if(mode != RELOC_USERLAND) {
die("unrecognized section type %02x", type);
}
}
}
static void relocate_with_symtab(struct binary *load, enum reloc_mode mode, lookupsym_t lookup_sym, void *context, addr_t slide) {
if(mode != RELOC_EXTERN_ONLY && mode != RELOC_USERLAND) {
relocate_area(load, load->mach->dysymtab->locreloff, load->mach->dysymtab->nlocrel, mode, lookup_sym, context, slide);
}
if(mode != RELOC_LOCAL_ONLY) {
relocate_area(load, load->mach->dysymtab->extreloff, load->mach->dysymtab->nextrel, mode, lookup_sym, context, slide);
}
CMD_ITERATE(b_mach_hdr(load), cmd) {
MACHO_SPECIALIZE(
if(cmd->cmd == LC_SEGMENT_X) {
segment_command_x *seg = (void *) cmd;
//printf("%.16s %08x\n", seg->segname, seg->vmaddr);
section_x *sect = (void *) (seg + 1);
for(uint32_t i = 0; i < seg->nsects; i++, sect++) {
//printf(" %.16s\n", sect->sectname);
go_indirect(load, sect->offset, sect->size, sect->flags, sect->reserved1, sect->reserved2, mode, lookup_sym, context, slide);
relocate_area(load, sect->reloff, sect->nreloc, mode, lookup_sym, context, slide);
}
}
)
}
}
static void do_bind_section(prange_t opcodes, struct binary *load, bool weak, bool userland, lookupsym_t lookup_sym, void *context) {
uint8_t pointer_size = b_pointer_size(load);
uint8_t symbol_flags;
char *sym = NULL;
uint8_t type = BIND_TYPE_POINTER;
addr_t addend = 0;
prange_t segment = {NULL, 0};
addr_t segaddr = 0;
addr_t offset = 0;
void *ptr = opcodes.start, *end = ptr + opcodes.size;
while(ptr != end) {
void *orig_ptr = ptr;
uint8_t byte = read_int(&ptr, end, uint8_t);
uint8_t immediate = byte & BIND_IMMEDIATE_MASK;
uint8_t opcode = byte & BIND_OPCODE_MASK;
addr_t count, stride;
switch(opcode) {
case BIND_OPCODE_DONE:
case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
// do nothing
break;
case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
read_uleb128(&ptr, end);
break;
case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
sym = read_cstring(&ptr, end);
symbol_flags = immediate;
break;
case BIND_OPCODE_SET_TYPE_IMM:
type = immediate;
break;
case BIND_OPCODE_SET_ADDEND_SLEB:
addend = read_sleb128(&ptr, end);
break;
case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
if(immediate >= load->nsegments) {
die("segment too high");
}
segment = rangeconv_off(load->segments[immediate].file_range, MUST_FIND);
segaddr = load->segments[immediate].vm_range.start;
offset = read_uleb128(&ptr, end);
break;
case BIND_OPCODE_ADD_ADDR_ULEB:
{
addr_t o = read_uleb128(&ptr, end);
offset += o;
}
break;
case BIND_OPCODE_DO_BIND:
count = 1;
stride = pointer_size;
goto bind;
case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
count = 1;
stride = read_uleb128(&ptr, end) + pointer_size;
goto bind;
case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
count = 1;
stride = immediate * pointer_size + pointer_size;
goto bind;
case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
count = read_uleb128(&ptr, end);
stride = read_uleb128(&ptr, end) + pointer_size;
goto bind;
bind: {
if(!sym || !segment.start) die("improper bind");
bool _64b;
addr_t value;
value = lookup_symbol_or_do_stuff(lookup_sym, context, sym, weak, userland);
if(!value) {
offset += stride * count;
break;
}
value += addend;
switch(type) {
case BIND_TYPE_POINTER:
_64b = pointer_size == 8;
break;
case BIND_TYPE_TEXT_ABSOLUTE32:
_64b = false;
break;
case BIND_TYPE_TEXT_PCREL32:
_64b = false;
value = -value + (segaddr + offset + 4);
break;
default:
die("bad bind type %d", (int) type);
}
if(offset >= segment.size ||
stride < (_64b ? sizeof(uint64_t) : sizeof(uint32_t)) ||
(segment.size - offset) / stride < count) {
die("bad address while binding");
}
while(count--) {
if(_64b) {
*((uint64_t *) (segment.start + offset)) = value;
} else {
*((uint32_t *) (segment.start + offset)) = value;
}
offset += stride;
if(type == BIND_TYPE_TEXT_PCREL32) value += stride;
}
memset(orig_ptr, BIND_OPCODE_SET_TYPE_IMM, ptr - orig_ptr);
type = BIND_TYPE_POINTER;
break;
}
default:
die("unknown bind opcode 0x%x", (int) opcode);
}
}
}
static void do_rebase(struct binary *load, prange_t opcodes, addr_t slide) {
uint8_t pointer_size = b_pointer_size(load);
uint8_t type = REBASE_TYPE_POINTER;
addr_t offset = 0;
prange_t segment = {NULL, 0};
void *ptr = opcodes.start, *end = ptr + opcodes.size;
while(ptr != end) {
uint8_t byte = read_int(&ptr, end, uint8_t);
uint8_t immediate = byte & BIND_IMMEDIATE_MASK;
uint8_t opcode = byte & BIND_OPCODE_MASK;
addr_t count, stride;
switch(opcode) {
// this code is very similar to do_bind_section
case REBASE_OPCODE_DONE:
return;
case REBASE_OPCODE_SET_TYPE_IMM:
type = immediate;
break;
case REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
if(immediate >= load->nsegments) {
die("segment too high");
}
segment = rangeconv_off(load->segments[immediate].file_range, MUST_FIND);
offset = read_uleb128(&ptr, end);
break;
case REBASE_OPCODE_ADD_ADDR_ULEB:
offset += read_uleb128(&ptr, end);
break;
case REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
offset += immediate * pointer_size;
break;
case REBASE_OPCODE_DO_REBASE_IMM_TIMES:
count = immediate;
stride = pointer_size;
goto rebase;
case REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
count = read_uleb128(&ptr, end);
stride = pointer_size;
goto rebase;
case REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
count = 1;
stride = read_uleb128(&ptr, end) + pointer_size;
goto rebase;
case REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
count = read_uleb128(&ptr, end);
stride = read_uleb128(&ptr, end) + pointer_size;
goto rebase;
rebase: {
bool _64b;
switch(type) {
case REBASE_TYPE_POINTER:
_64b = pointer_size == 8;
break;
case REBASE_TYPE_TEXT_ABSOLUTE32:
case REBASE_TYPE_TEXT_PCREL32:
_64b = false;
break;
default:
die("bad rebase type %d", (int) type);
}
if(offset >= segment.size || (segment.size - offset) / stride < count) {
die("bad address while rebasing");
}
while(count--) {
if(_64b) {
*((uint64_t *) (segment.start + offset)) += slide;
} else {
uint32_t *ptr = segment.start + offset;
*ptr += slide;
if(type == REBASE_TYPE_TEXT_PCREL32) {
// WTF!? This is actually what dyld does.
*ptr = -*ptr;
}
}
offset += stride;
}
break;
}
default:
die("unknown rebase opcode 0x%x", (int) opcode);
}
}
}
static void relocate_with_dyld_info(struct binary *load, enum reloc_mode mode, lookupsym_t lookup_sym, void *context, addr_t slide) {
// It gets more complicated
struct dyld_info_command *dyld_info = load->mach->dyld_info;
#define fetch(type) prange_t type = dyld_info->type##_off ? rangeconv_off((range_t) {load, dyld_info->type##_off, dyld_info->type##_size}, MUST_FIND) : (prange_t) {NULL, 0};
if(mode != RELOC_EXTERN_ONLY && slide != 0) {
fetch(rebase)
do_rebase(load, rebase, slide);
dyld_info->rebase_size = 0;
}
if(mode != RELOC_LOCAL_ONLY) {
fetch(bind)
fetch(weak_bind)
fetch(lazy_bind)
bool userland = mode == RELOC_USERLAND;
do_bind_section(bind, load, userland, userland, lookup_sym, context);
do_bind_section(weak_bind, load, true, userland, lookup_sym, context);
do_bind_section(lazy_bind, load, userland, userland, lookup_sym, context);
}
}
void b_relocate(struct binary *load, const struct binary *target, enum reloc_mode mode, lookupsym_t lookup_sym, void *context, addr_t slide) {
if(mode == RELOC_USERLAND && slide != 0) {
die("sliding is not supported in userland mode");
}
if(!load->mach->symtab || !load->mach->dysymtab) {
die("no LC_SYMTAB/LC_DYSYMTAB");
}
// check for overlap
if(target) {
for(uint32_t i = 0; i < load->nsegments; i++) {
struct data_segment *a = &load->segments[i];
for(uint32_t j = 0; j < target->nsegments; j++) {
struct data_segment *b = &target->segments[j];
addr_t diff = b->vm_range.start - (a->vm_range.start + slide);
if(diff < a->vm_range.size || -diff < b->vm_range.size) {
die("segments of load and target overlap; load:%llx+%zu target:%llx+%zu", (uint64_t) a->vm_range.start, a->vm_range.size, (uint64_t) b->vm_range.start, b->vm_range.size);
}
}
}
}
(load->mach->dyld_info ? relocate_with_dyld_info : relocate_with_symtab)(load, mode, lookup_sym, context, slide);
if(mode != RELOC_EXTERN_ONLY && slide != 0) {
CMD_ITERATE(b_mach_hdr(load), cmd) {
MACHO_SPECIALIZE(
if(cmd->cmd == LC_SEGMENT_X) {
segment_command_x *seg = (void *) cmd;
section_x *sect = (void *) (seg + 1);
seg->vmaddr += slide;
for(uint32_t i = 0; i < seg->nsects; i++, sect++) {
sect->addr += slide;
}
}
)
}
}
}

17
data/mach-o/link.h Normal file
View File

@ -0,0 +1,17 @@
#pragma once
#include "binary.h"
typedef addr_t (*lookupsym_t)(void *context, const char *sym);
enum reloc_mode {
RELOC_DEFAULT,
RELOC_LOCAL_ONLY,
RELOC_EXTERN_ONLY,
RELOC_USERLAND
};
__BEGIN_DECLS
void b_relocate(struct binary *load, const struct binary *target /* can be null to not check for overlap */, enum reloc_mode mode, lookupsym_t lookup_sym, void *context, addr_t slide);
__END_DECLS

View File

@ -0,0 +1,56 @@
#pragma once
#include <stdint.h>
// ld64
static addr_t read_xleb128(void **ptr, void *end, bool is_signed) {
addr_t result = 0;
uint8_t *p = *ptr;
uint8_t bit;
unsigned int shift = 0;
do {
if(p >= (uint8_t *) end) die("uleb128 overrun");
bit = *p++;
addr_t k = bit & 0x7f;
// 0x0051 BIND_OPCODE_ADD_ADDR_ULEB(0xFFFFFFF8)
// the argument is a lie, it's actually 64 bits of fff, which overflows here
// it should just be sleb, but ...
//if(shift >= 8*sizeof(addr_t) || ((k << shift) >> shift) != k) die("uleb128 too big");
if(shift < sizeof(addr_t) * 8) {
result |= k << shift;
}
shift += 7;
} while(bit & 0x80);
if(is_signed && (bit & 0x40)) {
result |= ~(((addr_t) 0) << shift);
}
*ptr = p;
return result;
}
static addr_t read_uleb128(void **ptr, void *end) {
return read_xleb128(ptr, end, false);
}
__attribute__((unused)) static addr_t read_sleb128(void **ptr, void *end) {
return read_xleb128(ptr, end, true);
}
static inline void *read_bytes(void **ptr, void *end, size_t size) {
char *p = *ptr;
if((size_t) ((char *) end - p) < size) die("too big");
*ptr = p + size;
return p;
}
#define read_int(ptr, end, typ) *((typ *) read_bytes(ptr, end, sizeof(typ)))
static inline char *read_cstring(void **ptr, void *end) {
// could use strnlen...
char *start = *ptr, *strend = start;
while(strend != end) {
if(!*strend++) {
*ptr = strend;
return start;
}
}
die("c string overflow");
}