ios-kernel-patch/grapher.cpp

772 lines
28 KiB
C++
Raw Normal View History

2011-06-05 06:12:56 +07:00
#include <data/mach-o/binary.h>
2011-06-27 09:11:21 +07:00
#include <data/find.h>
2011-06-05 06:12:56 +07:00
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <list>
#include <tr1/unordered_map>
2011-06-05 06:12:56 +07:00
#include <map>
#include <set>
#include <iostream>
#include <algorithm>
#include <vector>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include <math.h>
using namespace std;
using namespace std::tr1;
2011-06-05 06:12:56 +07:00
2011-06-27 09:11:21 +07:00
enum {
FULL_HASH,
BEGINNING_HASH,
ENDING_HASH
};
static int hashMode = FULL_HASH;
2011-06-05 06:12:56 +07:00
struct Edge;
// todo: after changing data, implement this there
static bool b_in_vmrange(const struct binary *binary, addr_t addr) {
CMD_ITERATE(binary->mach->hdr, cmd) {
if(cmd->cmd == LC_SEGMENT) {
auto s = (struct segment_command *) cmd;
if(addr - s->vmaddr < s->vmsize) return true;
}
}
return false;
}
static uint32_t signExtend(uint32_t val, int bits) {
if(val & (1 << (bits - 1))) {
val |= ~((1 << bits) - 1);
}
return val;
}
enum {
2011-06-27 09:11:21 +07:00
INCOMPLETE_FUNC = 0,
2011-06-05 06:12:56 +07:00
THUMB_FUNC,
THUMB_VARARGS_FUNC,
THUMB_ACCESSOR_FUNC
};
// misnomer-- it's actually any symbol
struct Function {
vector<Edge *> forward, backward;
uint16_t *start, *end, *endOfWorld;
addr_t startAddr;
const char *name;
uint32_t hash;
2011-06-27 09:11:21 +07:00
list<pair<pair<addr_t, addr_t>, bool>> refs;// the bool is whether it's a function
2011-06-05 06:12:56 +07:00
int type;
Function(uint16_t *start, uint16_t *end, uint16_t *endOfWorld, addr_t startAddr, const char *name, int type)
: start(start), end(end), endOfWorld(endOfWorld), startAddr(startAddr), name(name), type(type) {
if(start && !end) {
parse();
}
setHash();
}
void parse() {
auto addr = startAddr & ~1;
auto knownEnd = start;
uint16_t *p;
for(p = start; p + 2 <= endOfWorld && p < start + 0x400; p++, addr += 2) {
uint32_t jumpTarget = 0;
if((p[0] & 0xf000) == 0xd000) { // B1
jumpTarget = signExtend(p[0] & 0xff, 8);
2011-06-27 09:11:21 +07:00
p[0] = 0x42;
2011-06-05 06:12:56 +07:00
} else if((p[0] & 0xf800) == 0xe000) { // B2
jumpTarget = signExtend(p[0] & 0x7ff, 11);
2011-06-27 09:11:21 +07:00
p[0] = 0x43;
2011-06-05 06:12:56 +07:00
} else if((p[0] & 0xf800) == 0xf000 && ((p[1] & 0xd000) == 0x8000) && ((p[0] & 0x380) >> 7) != 7) { // B3
jumpTarget = signExtend(((p[0] & 0x400) << 9) | ((p[1] & 0x800) << 7) | ((p[1] & 0x2000) << 4) | ((p[0] & 0x3f) << 11) | (p[1] & 0x7ff), 20);
2011-06-27 09:11:21 +07:00
p[0] = p[1] = 0x44;
2011-06-05 06:12:56 +07:00
} else if((p[0] & 0xf500) == 0xb100) { // CB[N]Z
jumpTarget = ((p[0] & 0x200) >> 4) | ((p[0] & 0xf8) >> 3);
} else if((p[0] & 0xf800) == 0x4800) { // LDR literal
auto target = (uint32_t *) (p + ((addr & 2) ? 1 : 2) + 2*(p[0] & 0xff));
if(target < (uint32_t *) endOfWorld && *target) {
2011-06-27 09:11:21 +07:00
refs.push_back(make_pair(make_pair(addr, *target), false));
2011-06-05 06:12:56 +07:00
}
2011-06-27 09:11:21 +07:00
p[0] = 0x45;
2011-06-05 06:12:56 +07:00
} else if((p[0] & 0xff7f) == 0xf85f) { // LDR literal 2
auto target = (uint32_t *) ((uint8_t *) p + ((addr & 2) ? 2 : 4) + (p[1] & 0xfff));
if(target < (uint32_t *) endOfWorld && *target) {
2011-06-27 09:11:21 +07:00
refs.push_back(make_pair(make_pair(addr, *target), false));
2011-06-05 06:12:56 +07:00
}
2011-06-27 09:11:21 +07:00
p[0] = p[1] = 0x46;
2011-06-05 06:12:56 +07:00
} else if((p[0] & 0xf800) == 0xf000 && (p[1] & 0xc000) == 0xc000) { // BL(X)
// gross
auto S = ((p[0] & 0x400) >> 10), J1 = (p[1] & 0x2000) >> 13, J2 = (p[1] & 0x800) >> 11;
auto I1 = ~(J1 ^ S) & 1, I2 = ~(J2 ^ S) & 1;
auto diff = ((p[0] & 0x400) >> 14) | (I1 << 23) | (I2 << 22) | ((p[0] & 0x3ff) << 12) | ((p[1] & 0x7ff) << 1);
diff = signExtend(diff, 24);
if(diff & 0x800000) diff |= 0xff000000;
auto target = addr + diff + 4;
if(p[1] & 0x1000) { // BL
target |= 1;
} else { // BLX
target &= ~2;
}
if(target)
refs.push_back(make_pair(make_pair(addr, target), true));
2011-06-05 06:12:56 +07:00
2011-06-27 09:11:21 +07:00
p[0] = p[1] = 0x46;
2011-06-05 06:12:56 +07:00
} else if(
(type == THUMB_FUNC && p[0] == (0xbd00 | (start[0] & 0xff))) ||
2011-06-27 09:11:21 +07:00
(type == THUMB_VARARGS_FUNC && (p[0] & 0xb000) == 0xb000 && p[1] == 0x4770)) { // end of function
2011-06-05 06:12:56 +07:00
jumpTarget = UINT32_MAX;
}
/*if(startAddr == 0x800632bd) {
fprintf(stderr, "%x -> %x\n", startAddr + 2*(p - start), jumpTarget);
}*/
if(jumpTarget) {
if(jumpTarget != UINT32_MAX) {
auto newEnd = p + 2 + jumpTarget;
if(newEnd < endOfWorld && newEnd > knownEnd) knownEnd = newEnd;
}
if(p >= knownEnd) break;
}
if(((p[0] >> 13) & 0b111) == 0b111 && ((p[0] >> 11) & 0b11) != 0b00) {
// 32-bit
p++, addr += 2;
}
}
end = p;
}
void setHash() {
2011-06-27 09:11:21 +07:00
auto hstart = start;
auto hend = end;
switch(hashMode) {
case BEGINNING_HASH:
hstart = start;
hend = hstart + 7;
break;
case ENDING_HASH:
hend = end;
hstart = hend - 7;
break;
}
if(hstart < start) hstart = start;
if(hend > end) hend = end;
for(auto p = hstart; p + 1 <= hend; p++) {
2011-06-05 06:12:56 +07:00
hash += *p;
}
}
double predict(const Function *other) const {
size_t myLength = end - start, hisLength = other->end - other->start;
if(type != other->type) {
return 0.0;
}
if(type == INCOMPLETE_FUNC) {
return 0.5;
}
if(myLength <= 7 && hisLength <= 7) {
return (myLength == hisLength && !memcmp(start, other->start, myLength)) ? 1.0 : 0.0;
}
double failed = 0;
for(auto p = start; p < end;) {
size_t bestBitLength = 1;
for(auto q = other->start; q < other->end; q++) {
size_t i = 0;
for(i = 0; q + i < other->end && p + i < end; i++) {
if(q[i] != p[i]) break;
}
if(i > bestBitLength) {
bestBitLength = i;
}
}
if(bestBitLength < 5) failed += bestBitLength;
p += bestBitLength;
}
return 1.0 - (failed / myLength);
}
};
struct Edge {
Function *source, *dest;
uint32_t hash;
Edge(Function *source, Function *dest)
: source(source), dest(dest) {
source->forward.push_back(this);
dest->backward.push_back(this);
}
2011-06-27 09:11:21 +07:00
inline void setHash() {
hash = source->hash ^ ((dest->hash >> 16) | (dest->hash << 16));
}
2011-06-05 06:12:56 +07:00
};
// this was a lambda, then I tried to compile with clang
static bool compareStartAddr(Function *const& a, Function *const& b) {
return a->startAddr < b->startAddr;
}
2011-06-05 06:12:56 +07:00
struct Binary {
struct binary binary;
const char *filename;
unordered_map<addr_t, Function *> funcs;
2011-06-27 09:11:21 +07:00
unordered_map<uint32_t, vector<Function *>> funcsByHash;
2011-06-05 06:12:56 +07:00
map<string, Function *> funcsByName;
2011-06-27 09:11:21 +07:00
vector<Function *> funcsList;
unordered_map<uint32_t, vector<Edge *>> edgesByHash;
2011-06-05 06:12:56 +07:00
unordered_map<addr_t, const char *> reverseSymbols;
Binary(const char *filename)
: filename(filename) {
b_init(&binary);
b_load_macho(&binary, filename);
doFuncs();
doSymbols();
}
void doSymbols() {
for(uint32_t i = 0; i < binary.mach->nsyms; i++) {
struct nlist *nl = binary.mach->symtab + i;
if(nl->n_value && (uint32_t) nl->n_un.n_strx < binary.mach->strsize) {
auto it = funcs.find(nl->n_value | ((nl->n_desc & N_ARM_THUMB_DEF) ? 1 : 0));
if(it != funcs.end()) {
setFuncName(it->second, binary.mach->strtab + nl->n_un.n_strx);
}
}
}
}
void setFuncName(Function *func, const char *name) {
// not quite right
if(func->name) {
auto it = funcsByName.find(func->name);
if(it != funcsByName.end() && it->second == func) funcsByName.erase(it);
}
func->name = name;
if(name) funcsByName[name] = func;
}
Function *addFunc(uint16_t *start, uint16_t *end, addr_t addr, int type) {
Function *&func = funcs[addr];
if(!func) {
func = new Function(start, end, (uint16_t *) ((char *) binary.valid_range.start + binary.valid_range.size), addr, reverseSymbols[addr], type);
2011-06-05 06:12:56 +07:00
funcsList.push_back(func);
}
return func;
}
2011-06-27 09:11:21 +07:00
void cut(const set<uint32_t>& cutPoints, bool explain) {
2011-06-05 06:12:56 +07:00
uint32_t x = 0;
for(auto func : funcsList) {
uint32_t hash = func->hash;
2011-06-27 09:11:21 +07:00
func->hash ^= x;
//func->hash = x;
//printf("%08x: %x -> %x\n", func->startAddr, hash, func->hash);
2011-06-05 06:12:56 +07:00
if(cutPoints.find(hash) != cutPoints.end()) {
2011-06-27 09:11:21 +07:00
if(explain) printf("CUT: hash %x from %x\n", hash, func->startAddr);
x = (hash << 2);
2011-06-05 06:12:56 +07:00
}
}
}
void doFuncs() {
auto range = b_macho_segrange(&binary, "__TEXT");
auto pr = rangeconv(range, MUST_FIND);
auto start = (uint16_t *) pr.start, end = start + pr.size/2;
auto addr = range.start;
for(uint16_t *p = start; p + 4 <= end; p++, addr += 2) {
if((p[0] == 0xb40f || p[0] == 0xb40c) && (p[1] & 0xff00) == 0xb500 && (p[2] & 0xff00) == 0xaf00) {
addFunc(p, NULL, addr | 1, THUMB_VARARGS_FUNC);
} else if((p[0] & 0xff00) == 0xb500 && (p[1] & 0xff00) == 0xaf00) {
addFunc(p, NULL, addr | 1, THUMB_FUNC);
} else if((p[0] & 0xf83f) == 0x6800 && p[1] == 0x4770) {
addFunc(p, p + 2, addr | 1, THUMB_ACCESSOR_FUNC);
}
}
// can't use for( : ) because it will be mutated
for(auto i = 0u; i < funcsList.size(); i++) {
auto func = funcsList[i];
2011-06-05 06:12:56 +07:00
for(auto p : func->refs) {
2011-06-27 09:11:21 +07:00
auto b = p.first.second;
auto executable = p.second;
2011-06-05 06:12:56 +07:00
if(b_in_vmrange(&binary, b)) {
Function *&func2 = funcs[b];
if(!func2) {
prange_t pr = rangeconv((range_t) {&binary, b, 4}, 0);
2011-06-27 09:11:21 +07:00
if(!pr.start || !executable) {
//value = *((uint32_t *) pr.start),
//(/* hack */ true || b_in_vmrange(&binary, value)))) {
2011-06-05 06:12:56 +07:00
// quick guess
pr.size = 0;
}
func2 = addFunc((uint16_t *) pr.start, (uint16_t *) ((char *) pr.start + pr.size), b, INCOMPLETE_FUNC);
2011-06-05 06:12:56 +07:00
}
2011-06-27 09:11:21 +07:00
new Edge(func, func2);
2011-06-05 06:12:56 +07:00
}
}
}
2011-06-27 09:11:21 +07:00
for(auto func : funcsList) {
#define X(direction, port) \
if(hashMode == FULL_HASH && false) { \
for(auto edge : func->direction) { \
func->hash ^= ~(edge->port->hash); \
} \
} else { \
if(func->direction.size() == 1) { \
func->hash += ~(*func->direction.begin())->port->hash; \
} \
}
X(forward, dest)
X(backward, source)
#undef X
}
sort(funcsList.begin(), funcsList.end(), compareStartAddr);
2011-06-05 06:12:56 +07:00
}
2011-06-27 09:11:21 +07:00
void doHashes() {
2011-06-05 06:12:56 +07:00
if(edgesByHash.size()) return;
for(auto func : funcsList) {
2011-06-27 09:11:21 +07:00
funcsByHash[func->hash].push_back(func);
for(auto edge : func->forward) {
edge->setHash();
edgesByHash[edge->hash].push_back(edge);
2011-06-05 06:12:56 +07:00
}
}
}
2011-06-27 09:11:21 +07:00
unordered_map<uint32_t, list<Function *>> getFuncsByHash() {
typeof(getFuncsByHash()) result;
for(auto func : funcsList) {
result[func->hash].push_back(func);
}
return result;
}
2011-06-05 06:12:56 +07:00
void identifyVtables(bool explain) {
2011-06-27 09:11:21 +07:00
doHashes();
2011-06-05 06:12:56 +07:00
auto constructor = funcsByName["__ZN11OSMetaClassC2EPKcPKS_j"];
assert(constructor);
unordered_map<addr_t, const char *> metaClasses;
for(auto edge : constructor->backward) {
2011-07-13 05:13:35 +07:00
auto nameAddr = edge->source->refs.begin()->first.second;
2011-06-05 06:12:56 +07:00
if(!nameAddr) continue;
// xxx
auto className = (const char *) rangeconv((range_t) {&binary, nameAddr, 128}, 0).start;
if(!className) continue;
if(edge->source->backward.size() != 1) continue;
auto mcInstantiator = (*edge->source->backward.begin())->source;
addr_t metaClass;
auto it = mcInstantiator->refs.begin();
for(it++; it != mcInstantiator->refs.end(); it++) {
2011-07-13 05:13:35 +07:00
if(it->first.second == edge->source->startAddr) {
2011-06-05 06:12:56 +07:00
auto it2 = it;
it2--;
2011-07-13 05:13:35 +07:00
metaClass = it2->first.second;
2011-06-05 06:12:56 +07:00
goto ok;
}
}
continue;
ok:
2011-07-13 05:13:35 +07:00
if(explain) printf("ok %s\n", className);
2011-06-05 06:12:56 +07:00
metaClasses[metaClass] = className;
}
auto constructed = funcsByName["__ZNK11OSMetaClass19instanceConstructedEv"];
for(auto edge : constructed->backward) {
if(edge->source->refs.size() == 4) {
auto it = edge->source->refs.begin();
2011-07-13 05:13:35 +07:00
auto metaClass = it->first.second;
2011-06-05 06:12:56 +07:00
it++; it++;
2011-07-13 05:13:35 +07:00
auto vtable = it->first.second - 8;
2011-06-05 06:12:56 +07:00
auto className = metaClasses[metaClass];
if(!className) continue;
if(explain) printf("%x: %s\n", metaClass, className);
auto func = addFunc(NULL, NULL, vtable, INCOMPLETE_FUNC);
char funcName[128];
snprintf(funcName, sizeof(funcName), "__ZTV%zd%s", strlen(className), className);
setFuncName(func, funcName);
}
}
//for(auto edge : func->backward) {
}
void injectSymbols(const char *output) {
// need a fresh copy that's not normalized
struct binary binary;
b_init(&binary);
b_load_macho(&binary, filename);
char *str = binary.mach->strtab + 4;
struct nlist *nl = binary.mach->symtab;
for(auto p : funcsByName) {
if(nl - binary.mach->symtab >= binary.mach->nsyms) {
fprintf(stderr, "symbol overflow\n");
break;
}
memset(nl, 0, sizeof(*nl));
nl->n_un.n_strx = str - binary.mach->strtab;
nl->n_value = p.second->startAddr & ~1;
if(p.second->startAddr & 1) {
nl->n_desc |= N_ARM_THUMB_DEF;
}
auto size = p.first.size();
if(str + size >= binary.mach->strtab + binary.mach->strsize) {
fprintf(stderr, "string overflow at %s\n", p.first.c_str());
abort();
}
strlcpy(str, p.first.data(), size + 1);
str += size + 1;
nl++;
}
CMD_ITERATE(binary.mach->hdr, cmd) {
if(cmd->cmd == LC_SYMTAB) {
auto s = (struct symtab_command *) cmd;
s->nsyms = nl - binary.mach->symtab;
} else if(cmd->cmd == LC_DYSYMTAB) {
auto d = (struct dysymtab_command *) cmd;
d->iextdefsym = 0;
d->nextdefsym = nl - binary.mach->symtab;
}
}
b_macho_store(&binary, output);
}
};
2011-06-27 09:11:21 +07:00
static void doCutPoints(Binary *ba, Binary *bb, bool explain = false) {
auto funcsByHashA = ba->getFuncsByHash(), funcsByHashB = bb->getFuncsByHash();
2011-06-05 06:12:56 +07:00
set<uint32_t> cutPoints;
2011-06-27 09:11:21 +07:00
for(auto p : funcsByHashA) {
2011-06-05 06:12:56 +07:00
if(p.second.size() == 1 &&
2011-06-27 09:11:21 +07:00
funcsByHashB[p.first].size() == 1) {
2011-06-05 06:12:56 +07:00
cutPoints.insert(p.first);
}
}
2011-06-27 09:11:21 +07:00
if(explain) {
// verify (by hand) that cut points are actually in the same order between the binaries
auto it = ba->funcsList.begin(), it2 = bb->funcsList.begin();
while(it != ba->funcsList.end() && it2 != bb->funcsList.end()) {
while(cutPoints.find((*it)->hash) == cutPoints.end()) if(++it == ba->funcsList.end()) goto done;
while(cutPoints.find((*it2)->hash) == cutPoints.end()) if(++it2 == bb->funcsList.end()) goto done;
if((*it)->hash != (*it2)->hash) printf("XXX ");
printf("%08x:%08x %x:%x\n", (*it)->startAddr, (*it2)->startAddr, (*it)->hash, (*it2)->hash);
it++; it2++;
}
done:;
}
ba->cut(cutPoints, explain);
bb->cut(cutPoints, explain);
2011-06-05 06:12:56 +07:00
}
2011-06-27 09:11:21 +07:00
static list<pair<Function *, Function *>> doMatch(Binary *ba, Binary *bb, bool explain = false) {
2011-06-05 06:12:56 +07:00
doCutPoints(ba, bb);
2011-06-27 09:11:21 +07:00
ba->doHashes();
bb->doHashes();
2011-06-05 06:12:56 +07:00
// This is not the most efficient thing
unordered_map<Function *, unordered_map<Function *, double>> xs;
unordered_map<Edge *, unordered_map<Edge *, double>> ys;
for(auto a : ba->funcsList) {
for(auto b : bb->funcsByHash[a->hash]) {
double val;
auto forward = a->predict(b), backward = b->predict(a);
val = (forward + backward) / 2;
xs[a][b] = val;
}
}
2011-06-27 09:11:21 +07:00
struct MetaEdge {
double *source;
double weight;
};
struct MetaVertex {
double *dest;
MetaEdge *edges;
};
vector<MetaVertex> mvs;
fprintf(stderr, "5.1\n");
// 5.1
for(auto group : ba->edgesByHash) {
for(auto edge1 : group.second) {
for(auto edge2 : bb->edgesByHash[edge1->hash]) {
MetaVertex mv;
mv.dest = &ys[edge1][edge2];
mv.edges = new MetaEdge[3];
mv.edges[0] = (MetaEdge) {&xs[edge1->source][edge2->source], 1};
mv.edges[1] = (MetaEdge) {&xs[edge1->dest][edge2->dest], 1};
mv.edges[2] = (MetaEdge) {NULL, 0};
mvs.push_back(mv);
}
}
}
fprintf(stderr, "5.2\n");
// 5.2, tweaked to account for order
// not perfect
// but that needs be divided by something
// oh, and we can't completely discard the original xs value because some functions neither call or are called by anyone we know; we still can use matching
for(auto a : ba->funcsList) {
//printf("%d %x\n", (int) bb->funcsByHash[a->hash].size(), a->startAddr);
for(auto b : bb->funcsByHash[a->hash]) {
//printf("welp\n");
MetaVertex mv;
mv.dest = &xs[a][b];
MetaEdge *ptr = mv.edges = new MetaEdge[a->forward.size() * b->forward.size() + a->backward.size() * b->backward.size() + 2];
#define X(direction) \
if(a->direction.size() == b->direction.size()) { \
for(auto it = a->direction.begin(), it2 = b->direction.begin(); \
it != a->direction.end(); \
it++, it2++) { \
*ptr++ = (MetaEdge) {&ys[*it][*it2], 1}; \
} \
} else { \
double multiplier = 1.0 / (a->direction.size() + b->direction.size()); /* not a mistake */ \
for(auto ar : a->direction) { \
for(auto br : b->direction) { \
*ptr++ = (MetaEdge) {&ys[ar][br], multiplier}; \
} \
} \
}
X(forward)
X(backward)
#undef X
*ptr++ = (MetaEdge) {mv.dest, 0.1};
*ptr++ = (MetaEdge) {NULL, 0};
mvs.push_back(mv);
}
}
size_t size = mvs.size();
MetaVertex *mvp = &mvs[0];
2011-06-05 06:12:56 +07:00
for(int iteration = 0; iteration < 6; iteration++) {
2011-06-27 09:11:21 +07:00
fprintf(stderr, "%d (%zd to go)\n", iteration, size);
2011-06-05 06:12:56 +07:00
if(0) {
// debug
printf("--\n");
#define F(addr) \
for(auto p : xs[ba->funcs[addr]]) { \
printf("%x=%x %f\n", addr, p.first->startAddr, p.second); \
} \
if(1) for(auto e : ba->funcs[addr]->backward) { \
for(auto p : ys[e]) { \
printf("%x->%x=%x->%x %f\n", e->source->startAddr, e->dest->startAddr, p.first->source->startAddr, p.first->dest->startAddr, p.second); \
} \
}
F(0x80063890)
}
2011-06-27 09:11:21 +07:00
MetaVertex *mymvp = mvp;
for(size_t i = 0; i < size; i++, mymvp++) {
MetaEdge *edge = mymvp->edges;
double result = 0;
while(edge->source) {
result += edge->weight * *edge->source;
edge++;
2011-06-05 06:12:56 +07:00
}
2011-06-27 09:11:21 +07:00
*mymvp->dest = result;
2011-06-05 06:12:56 +07:00
}
}
list<pair<Function *, Function *>> result;
for(auto p : xs) {
Function *maxFunction = NULL;
2011-06-27 09:11:21 +07:00
if(explain) printf("%08x (%s):\n", p.first->startAddr, p.first->name);
2011-06-05 06:12:56 +07:00
double maxValue = -1;
for(auto p2 : p.second) {
2011-06-27 09:11:21 +07:00
if(explain) printf(" %x=%f (predict:%f,%f)\n", p2.first->startAddr, p2.second, p.first->predict(p2.first), p2.first->predict(p.first));
2011-06-05 06:12:56 +07:00
if(p2.second > maxValue) {
maxValue = p2.second;
maxFunction = p2.first;
}
}
2011-06-27 09:11:21 +07:00
if(explain) printf("Max: %x\n\n", maxFunction ? maxFunction->startAddr : 0);
2011-06-05 06:12:56 +07:00
if(maxFunction) {
result.push_back(make_pair(p.first, maxFunction));
}
}
return result;
}
static list<pair<Function *, Function *>> doMatchTrivially(Binary *ba, Binary *bb) {
if(ba->funcsList.size() != bb->funcsList.size()) {
fprintf(stderr, "funcs list not the same size: %d/%d\n", (int) ba->funcsList.size(), (int) bb->funcsList.size());
}
list<pair<Function *, Function *>> result;
for(auto it = ba->funcsList.begin(), it2 = bb->funcsList.begin();
it != ba->funcsList.end() && it2 != bb->funcsList.end();
it++, it2++) {
result.push_back(make_pair(*it, *it2));
}
return result;
}
int main(__unused int argc, char **argv) {
argv++;
2011-06-27 09:11:21 +07:00
Binary *ba = NULL;
2011-06-05 06:12:56 +07:00
while(auto arg = *argv++)
if(!strncmp(arg, "--", 2)) {
string mode = arg;
2011-06-27 09:11:21 +07:00
if(mode.find("--hash=") == 0) {
string type = mode.substr(7);
if(type == "beginning") hashMode = BEGINNING_HASH;
else if(type == "ending") hashMode = ENDING_HASH;
else if(type == "full") hashMode = FULL_HASH;
} else if(mode == "--list") {
2011-06-05 06:12:56 +07:00
printf("List of funcs:\n");
bool refs = false;
if(*argv && !strcmp(*argv, "--refs")) {
refs = true;
argv++;
}
2011-06-27 09:11:21 +07:00
for(auto func : ba->funcsList) {
2011-06-05 06:12:56 +07:00
printf("%x-%x l=%ld h=%x n=%s f=%d b=%d t=%d\n", func->startAddr, (addr_t) (func->startAddr + 2*(func->end - func->start)), func->end - func->start, func->hash, func->name, (int) func->forward.size(), (int) func->backward.size(), func->type);
if(refs) for(auto ref : func->refs) {
2011-06-27 09:11:21 +07:00
printf(" r:%x->%x (%s)\n", (int) ref.first.first, (int) ref.first.second, ref.second ? "(code)" : "(data)");
2011-06-05 06:12:56 +07:00
}
}
} else if(mode == "--cut") {
Binary bb(*argv++);
2011-06-27 09:11:21 +07:00
bool explain = false;
if(*argv && !strcmp(*argv, "--explain")) {
explain = true;
argv++;
}
doCutPoints(ba, &bb, explain);
2011-06-05 06:12:56 +07:00
} else if(mode == "--byHash") {
printf("List of funcs by hash:\n");
2011-06-27 09:11:21 +07:00
ba->doHashes();
2011-06-05 06:12:56 +07:00
2011-06-27 09:11:21 +07:00
for(auto p : ba->funcsByHash) {
2011-06-05 06:12:56 +07:00
printf("%d - [%08x]:", (int) p.second.size(), p.first);
for(auto func : p.second) {
printf(" %x", func->startAddr);
}
printf("\n");
}
} else if(mode == "--compare") {
Binary bb(*argv++);
2011-06-27 09:11:21 +07:00
for(auto p : ba->reverseSymbols) {
2011-06-05 06:12:56 +07:00
auto myAddr = p.first, otherAddr = b_sym(&bb.binary, p.second, TO_EXECUTE);
2011-06-27 09:11:21 +07:00
auto first = ba->funcs[myAddr], second = bb.funcs[otherAddr];
2011-06-05 06:12:56 +07:00
if(first && second) {
double forward = first->predict(second), backward = second->predict(first);
printf("%.32s (%08x/%08x): %f\n", p.second, myAddr, otherAddr, (forward + backward) / 2);
}
}
} else if(mode == "--matchF" || mode == "--matchB" || mode == "--trivial") {
Binary bb(*argv++);
2011-06-27 09:11:21 +07:00
bool explain = false;
if(*argv && !strcmp(*argv, "--explain")) {
explain = true;
argv++;
}
2011-06-05 06:12:56 +07:00
list<pair<Function *, Function *>> result;
if(mode == "--matchF") {
2011-06-27 09:11:21 +07:00
result = doMatch(ba, &bb, explain);
2011-06-05 06:12:56 +07:00
} else if(mode == "--matchB") {
2011-06-27 09:11:21 +07:00
for(auto p : doMatch(&bb, ba, explain)) result.push_back(make_pair(p.second, p.first));
2011-06-05 06:12:56 +07:00
} else if(mode == "--trivial") {
2011-06-27 09:11:21 +07:00
result = doMatchTrivially(ba, &bb);
2011-06-05 06:12:56 +07:00
}
2011-06-27 09:11:21 +07:00
if(*argv && !strcmp(*argv, "--audit")) {
2011-06-05 06:12:56 +07:00
for(auto p : result) {
2011-06-27 09:11:21 +07:00
const char *trueName = ba->reverseSymbols[p.first->startAddr];
2011-06-05 06:12:56 +07:00
const char *name = bb.reverseSymbols[p.second->startAddr];
if(name && trueName && strcmp(name, trueName)) {
printf("Wrong: %x=%x (%s = %s)\n", p.first->startAddr, p.second->startAddr, trueName, name);
}
}
argv++;
2011-06-27 09:11:21 +07:00
} else if(*argv && !strcmp(*argv, "--show")) {
2011-06-05 06:12:56 +07:00
for(auto p : result) {
auto func1 = p.first, func2 = p.second;
printf("%08x/%08x %f/%f %s/%s\n", func1->startAddr, func2->startAddr, func1->predict(func2), func2->predict(func1), func1->name, func2->name);
}
argv++;
}
for(auto p : result) {
2011-06-27 09:11:21 +07:00
ba->setFuncName(p.first, p.second->name);
2011-06-05 06:12:56 +07:00
}
} else if(mode == "--clear") {
// HACK
2011-06-27 09:11:21 +07:00
for(auto func : ba->funcsList) {
2011-06-05 06:12:56 +07:00
if(func->name && strcmp(func->name, "__ZN11OSMetaClassC2EPKcPKS_j") && strcmp(func->name, "__ZNK11OSMetaClass19instanceConstructedEv"))
2011-06-27 09:11:21 +07:00
ba->setFuncName(func, NULL);
2011-06-05 06:12:56 +07:00
}
} else if(mode == "--vt") {
bool explain = false;
if(*argv && !strcmp(*argv, "--explain")) {
explain = true;
argv++;
}
2011-06-27 09:11:21 +07:00
ba->identifyVtables(explain);
} else if(mode == "--manual") {
const char *name = *argv++;
string mode = *argv++;
addr_t addr;
auto range = b_macho_segrange(&ba->binary, "__TEXT");
if(mode == "strref") {
addr = find_bof(range, find_int32(range, find_string(range, *argv++, 1, MUST_FIND), MUST_FIND), 2);
} else if(mode == "inline-strref") {
addr = find_bof(range, find_string(range, *argv++, 1, MUST_FIND), 2);
} else if(mode == "pattern") {
addr = find_data(range, *argv++, 0, MUST_FIND);
} else {
fprintf(stderr, "? %s\n", mode.c_str());
abort();
}
auto func = ba->funcs[addr];
if(!func) {
fprintf(stderr, "not a Function: %x\n", addr);
abort();
}
ba->setFuncName(func, name);
2011-06-05 06:12:56 +07:00
} else {
fprintf(stderr, "? %s\n", mode.c_str());
abort();
}
2011-06-27 09:11:21 +07:00
} else if(!ba) {
ba = new Binary(arg);
2011-06-05 06:12:56 +07:00
} else {
// write back
2011-06-27 09:11:21 +07:00
ba->injectSymbols(arg);
2011-06-05 06:12:56 +07:00
}
return 0;
}