481 lines
16 KiB
C
481 lines
16 KiB
C
#include "find.h"
|
|
#include "binary.h"
|
|
|
|
// Various links:
|
|
// http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treachery/
|
|
// http://www-igm.univ-mlv.fr/~lecroq/string/tunedbm.html#SECTION00195
|
|
// http://www-igm.univ-mlv.fr/~lecroq/string/node19.html#SECTION00190 (was using this)
|
|
|
|
static addr_t find_data_raw(range_t range, int16_t *buf, ssize_t pattern_size, size_t offset, int align, int options, const char *name) {
|
|
int8_t ps = (int8_t) pattern_size;
|
|
if(ps != pattern_size) {
|
|
die("pattern too long");
|
|
}
|
|
// the problem with this is that it is faster to search for everything at once
|
|
|
|
// reduce inefficiency
|
|
for(int pos = pattern_size - 1; pos >= 0; pos--) {
|
|
if(buf[pos] == -1) {
|
|
pattern_size--;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
int8_t table[256];
|
|
for(int c = 0; c < 256; c++) {
|
|
table[c] = ps;
|
|
}
|
|
for(int8_t pos = 0; pos < ps - 1; pos++) {
|
|
if(buf[pos] == -1) {
|
|
// Unfortunately, we can't put any character past being in this position...
|
|
for(int i = 0; i < 256; i++) {
|
|
table[i] = ps - pos - 1;
|
|
}
|
|
} else {
|
|
table[buf[pos]] = ps - pos - 1;
|
|
}
|
|
}
|
|
|
|
// this can't be -1 due to above
|
|
int8_t shift = table[buf[pattern_size - 1]];
|
|
table[buf[pattern_size - 1]] = 0;
|
|
|
|
// now, for each c, let x be the last position in the string, other than the final position, where c might appear, or -1 if it doesn't appear anywhere; table[i] is size - x - 1.
|
|
// so if we got c but no match, we can skip ahead by table[i]
|
|
// updated
|
|
buf += pattern_size - 1;
|
|
addr_t foundit = 0;
|
|
prange_t pr = rangeconv(range, MUST_FIND);
|
|
uint8_t *start = pr.start + pattern_size - 1;
|
|
uint8_t *end = pr.start + pr.size;
|
|
uint8_t *cutoff = end - 400; // arbitrary
|
|
uint8_t *cursor = start;
|
|
|
|
#define GUTS(keep_going) \
|
|
{ \
|
|
for(int i = 0; i >= (-pattern_size + 1); i--) { \
|
|
if(buf[i] != -1 && cursor[i] != buf[i]) { \
|
|
/* Not a match */ \
|
|
goto keep_going; \
|
|
} \
|
|
} \
|
|
/* Whoa, we found it */ \
|
|
addr_t new_match = cursor - start + range.start; \
|
|
if(align && (new_match & (align - 1))) { \
|
|
/* Just kidding. */ \
|
|
goto keep_going; \
|
|
} \
|
|
if(foundit) { \
|
|
die("found [%s] multiple times in range: first at %08llx then at %08llx", name, (uint64_t) foundit, (uint64_t) new_match); \
|
|
} \
|
|
foundit = new_match; \
|
|
if(align) { \
|
|
goto done; \
|
|
} \
|
|
} \
|
|
/* otherwise, keep searching to make sure we won't find it again */ \
|
|
keep_going: \
|
|
cursor += shift;
|
|
|
|
uint8_t jump;
|
|
|
|
while(1) {
|
|
if(cursor >= cutoff) break;
|
|
do {
|
|
jump = table[*cursor];
|
|
cursor += jump;
|
|
jump = table[*cursor];
|
|
cursor += jump;
|
|
jump = table[*cursor];
|
|
cursor += jump;
|
|
if(cursor >= end) goto done;
|
|
} while(jump);
|
|
GUTS(lbl1)
|
|
}
|
|
if(cursor >= end) goto done;
|
|
while(1) {
|
|
do {
|
|
jump = table[*cursor];
|
|
cursor += jump;
|
|
if(cursor >= end) goto done;
|
|
} while(jump);
|
|
GUTS(lbl2)
|
|
}
|
|
done:
|
|
if(foundit) {
|
|
return foundit + offset;
|
|
} else if(options & MUST_FIND) {
|
|
die("didn't find [%s] in range (%08llx, %zx)", name, (uint64_t) range.start, range.size);
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static void parse_pattern(const char *to_find, int16_t buf[128], ssize_t *pattern_size, ssize_t *offset) {
|
|
*pattern_size = 0;
|
|
*offset = 0;
|
|
autofree char *to_find_ = strdup(to_find);
|
|
while(to_find_) {
|
|
char *bit = strsep(&to_find_, " ");
|
|
if(!strcmp(bit, "-")) {
|
|
*offset = *pattern_size;
|
|
continue;
|
|
} else if(!strcmp(bit, "+")) {
|
|
*offset = *pattern_size + 1;
|
|
continue;
|
|
} else if(!strcmp(bit, "..")) {
|
|
buf[*pattern_size] = -1;
|
|
} else {
|
|
char *endptr;
|
|
buf[*pattern_size] = (int16_t) (strtol(bit, &endptr, 16) & 0xff);
|
|
if(*endptr) {
|
|
die("invalid bit %s in [%s]", bit, to_find);
|
|
}
|
|
}
|
|
if(++*pattern_size >= 128) {
|
|
die("pattern [%s] too big", to_find);
|
|
}
|
|
}
|
|
}
|
|
|
|
addr_t find_data(range_t range, const char *to_find, int align, int options) {
|
|
int16_t buf[128];
|
|
ssize_t pattern_size, offset;
|
|
parse_pattern(to_find, buf, &pattern_size, &offset);
|
|
return find_data_raw(range, buf, pattern_size, offset, align, options, to_find);
|
|
}
|
|
|
|
addr_t find_string(range_t range, const char *string, int align, int options) {
|
|
size_t len = strlen(string);
|
|
autofree int16_t *buf = malloc(sizeof(int16_t) * (len + 2));
|
|
buf[0] = buf[len + 1] = 0;
|
|
for(unsigned int i = 0; i < len; i++) {
|
|
buf[i+1] = (uint8_t) string[i];
|
|
}
|
|
bool pz = options & PRECEDING_ZERO;
|
|
bool tz = options & TRAILING_ZERO;
|
|
addr_t result = find_data_raw(range, pz ? buf : buf + 1, len + tz + pz, pz ? 1 : 0, align, options, string);
|
|
return result;
|
|
}
|
|
|
|
addr_t find_bytes(range_t range, const char *bytes, size_t len, int align, int options) {
|
|
autofree int16_t *buf = malloc(sizeof(int16_t) * (len + 2));
|
|
for(unsigned int i = 0; i < len; i++) {
|
|
buf[i] = (uint8_t) bytes[i];
|
|
}
|
|
addr_t result = find_data_raw(range, buf, len, 0, align, options, "bytes");
|
|
return result;
|
|
}
|
|
addr_t find_int32(range_t range, uint32_t number, int options) {
|
|
prange_t pr = rangeconv(range, MUST_FIND);
|
|
char *start = pr.start;
|
|
char *end = pr.start + pr.size;
|
|
for(char *p = start; p + 4 <= end; p++) {
|
|
if(*((uint32_t *)p) == number) {
|
|
return p - start + range.start;
|
|
}
|
|
}
|
|
if(options & MUST_FIND) {
|
|
die("didn't find %08x in range", number);
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// search for push {..., lr}; add r7, sp, ...
|
|
// if is_thumb = 2, then search for both thumb and arm variants
|
|
addr_t find_bof(range_t range, addr_t eof, int is_thumb) {
|
|
addr_t start = eof & ~1;
|
|
if(start - range.start >= range.size) {
|
|
die("out of range: %llx", (uint64_t) eof);
|
|
}
|
|
|
|
uint8_t *p = rangeconv(range, MUST_FIND).start + (start - range.start);
|
|
addr_t addr = start;
|
|
if(addr & 1) { p--; addr--; }
|
|
for(p -= 8, addr -= 8; addr >= start - 0x1000 && addr >= range.start; p -= 2, addr -= 2) {
|
|
if(p[1] == 0xb5 && p[3] == 0xaf && is_thumb != 0) {
|
|
return addr | 1;
|
|
} else if(p[2] == 0x2d && p[3] == 0xe9 &&
|
|
p[6] == 0x8d && p[7] == 0xe2 &&
|
|
is_thumb != 1 && !(addr & 2)) {
|
|
return addr;
|
|
}
|
|
|
|
}
|
|
die("couldn't find the beginning of %08llx", (uint64_t) eof);
|
|
}
|
|
|
|
uint32_t resolve_ldr(const struct binary *binary, addr_t addr) {
|
|
uint32_t val = b_read32(binary, addr & ~1);
|
|
addr_t target;
|
|
if(addr & 1) {
|
|
addr_t base = ((addr + 3) & ~3);
|
|
if((val & 0xf800) == 0x4800) { // thumb
|
|
target = base + ((val & 0xff) * 4);
|
|
} else if((val & 0xffff) == 0xf8df) { // thumb-2
|
|
target = base + ((val & 0x0fff0000) >> 16);
|
|
} else {
|
|
die("weird thumb instruction %08x at %08llx", val, (uint64_t) addr);
|
|
}
|
|
} else {
|
|
addr_t base = addr + 8;
|
|
if((val & 0x0fff0000) == 0x59f0000) { // arm
|
|
target = base + (val & 0xfff);
|
|
} else {
|
|
die("weird ARM instruction %08x at %08llx", val, (uint64_t) addr);
|
|
}
|
|
}
|
|
return b_read32(binary, target);
|
|
}
|
|
|
|
addr_t find_bl(range_t *range) {
|
|
bool thumb = range->start & 1;
|
|
range->start &= ~1;
|
|
prange_t pr = rangeconv(*range, MUST_FIND);
|
|
uint32_t diff;
|
|
void *base;
|
|
if(thumb) {
|
|
uint16_t *p = pr.start;
|
|
while((uintptr_t)(p + 2) <= (uintptr_t)pr.start + pr.size) {
|
|
base = p;
|
|
uint16_t val = *p++;
|
|
if((val & 0xf800) == 0xf000) {
|
|
uint32_t imm10 = val & 0x3ff;
|
|
uint32_t S = ((val & 0x400) >> 10);
|
|
uint16_t val2 = *p++;
|
|
|
|
uint32_t J1 = ((val2 & 0x2000) >> 13);
|
|
uint32_t J2 = ((val2 & 0x800) >> 11);
|
|
uint32_t I1 = ~(J1 ^ S) & 1, I2 = ~(J2 ^ S) & 1;
|
|
uint32_t imm11 = val2 & 0x7ff;
|
|
diff = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1);
|
|
|
|
if((val2 & 0xd000) == 0xd000) {
|
|
// BL
|
|
diff |= 1;
|
|
goto ok;
|
|
} else if((val2 & 0xd000) == 0xc000) {
|
|
// BLX
|
|
goto ok;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
uint32_t *p = pr.start;
|
|
while((uintptr_t)(p + 1) <= (uintptr_t)pr.start + pr.size) {
|
|
base = p;
|
|
uint32_t val = *p++;
|
|
if((val & 0xfe000000) == 0xfa000000) {
|
|
// BL
|
|
diff = ((val & 0xffffff) << 2);
|
|
goto ok;
|
|
} else if((val & 0x0f000000) == 0x0b000000) {
|
|
// BLX
|
|
diff = ((val & 0x1000000) >> 23) | ((val & 0xffffff) << 2) | 1;
|
|
goto ok;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
ok:;
|
|
addr_t baseaddr = ((char *) base) - ((char *) pr.start) + range->start + 4;
|
|
range->start = baseaddr + thumb;
|
|
if(diff & 0x800000) diff |= 0xff000000;
|
|
return baseaddr + diff;
|
|
}
|
|
|
|
#define unparen(args...) args
|
|
#define find_anywhere_func(name, args1, args2) \
|
|
addr_t b_find_##name##_anywhere(const struct binary *binary, unparen args1, int options) { \
|
|
uint32_t end = binary->nsegments - 1; \
|
|
for(uint32_t i = 0; i <= end; i++) { \
|
|
range_t range = binary->segments[i].vm_range; \
|
|
addr_t result = find_##name(range, unparen args2, i == end ? options : options & ~MUST_FIND); \
|
|
if(result) return result; \
|
|
} \
|
|
return 0; /* won't reach */ \
|
|
}
|
|
|
|
find_anywhere_func(data, (const char *to_find, int align), (to_find, align))
|
|
find_anywhere_func(string, (const char *string, int align), (string, align))
|
|
find_anywhere_func(bytes, (const char *bytes, size_t len, int align), (bytes, len, align))
|
|
find_anywhere_func(int32, (uint32_t number), (number))
|
|
|
|
struct pattern {
|
|
int16_t buf[128];
|
|
ssize_t pattern_size, offset;
|
|
const char *name;
|
|
addr_t *result;
|
|
};
|
|
|
|
struct findmany {
|
|
range_t range;
|
|
int num_patterns;
|
|
struct pattern *patterns;
|
|
};
|
|
|
|
struct findmany *findmany_init(range_t range) {
|
|
struct findmany *fm = malloc(sizeof(*fm));
|
|
fm->range = range;
|
|
fm->num_patterns = 0;
|
|
fm->patterns = NULL;
|
|
return fm;
|
|
}
|
|
|
|
void findmany_add(addr_t *result, struct findmany *fm, const char *to_find) {
|
|
if(fm->num_patterns >= 32) {
|
|
die("too many patterns");
|
|
}
|
|
fm->num_patterns++;
|
|
fm->patterns = realloc(fm->patterns, sizeof(struct pattern) * fm->num_patterns);
|
|
struct pattern *pat = &fm->patterns[fm->num_patterns - 1];
|
|
|
|
parse_pattern(to_find, pat->buf, &pat->pattern_size, &pat->offset);
|
|
pat->name = to_find;
|
|
pat->result = result;
|
|
*result = 0;
|
|
}
|
|
|
|
struct node {
|
|
uint16_t next[16];
|
|
uint32_t terminates;
|
|
};
|
|
|
|
struct node2 {
|
|
uint16_t next[16];
|
|
};
|
|
|
|
struct findmany2 {
|
|
struct node *nodes;
|
|
uint8_t *index_paths;
|
|
uint16_t node_count;
|
|
struct node2 *nodes2;
|
|
uint16_t node2_count;
|
|
};
|
|
|
|
static inline int find_or_create(void **restrict array, void *restrict cmp, uint16_t *restrict num_items, int item_size, uint16_t *restrict node) {
|
|
char *p = *array;
|
|
for(int j = 0; j < *num_items; j++) {
|
|
if(!memcmp(p, cmp, item_size)) {
|
|
*node = j;
|
|
return false;
|
|
}
|
|
p += item_size;
|
|
}
|
|
if(*num_items == 65535) {
|
|
die("welp");
|
|
}
|
|
*node = *num_items;
|
|
*array = realloc(*array, ++*num_items * item_size);
|
|
memcpy(*array + *node * item_size, cmp, item_size);
|
|
return true;
|
|
}
|
|
|
|
static uint16_t findmany_recurse(const struct findmany *restrict fm, struct findmany2 *restrict fm2, uint8_t *restrict cur_index_path) {
|
|
// this part is inefficient
|
|
uint16_t node;
|
|
if(!find_or_create((void **) &fm2->index_paths, cur_index_path, &fm2->node_count, fm->num_patterns, &node)) {
|
|
// it found an existing node
|
|
return node;
|
|
}
|
|
/*
|
|
printf("findmany_recurse: created node %d with cur_index_path = ", node);
|
|
for(int p = 0; p < fm->num_patterns; p++) {
|
|
printf(" %d", (int) cur_index_path[p]);
|
|
}
|
|
printf("\n");
|
|
*/
|
|
memcpy(fm2->index_paths + node * fm->num_patterns, cur_index_path, fm->num_patterns);
|
|
|
|
fm2->nodes = realloc(fm2->nodes, fm2->node_count * sizeof(struct node));
|
|
struct node *nodep = &fm2->nodes[node];
|
|
|
|
nodep->terminates = 0;
|
|
for(int p = 0; p < fm->num_patterns; p++) {
|
|
if(cur_index_path[p] == fm->patterns[p].pattern_size) {
|
|
nodep->terminates |= (1 << p);
|
|
cur_index_path[p] = 0;
|
|
}
|
|
}
|
|
|
|
autofree uint8_t *new_index_path = malloc(fm->num_patterns);
|
|
for(uint8_t hi = 0; hi < 16; hi++) {
|
|
struct node2 n2;
|
|
for(uint8_t lo = 0; lo < 16; lo++) {
|
|
uint8_t chr = hi * 16 + lo;
|
|
|
|
uint8_t orr = 0; // hack - a lot will point to 0
|
|
|
|
for(int p = 0; p < fm->num_patterns; p++) {
|
|
uint8_t idx = cur_index_path[p];
|
|
int16_t comp = fm->patterns[p].buf[idx];
|
|
if(comp == -1 || comp == chr) {
|
|
idx++;
|
|
} else {
|
|
idx = 0;
|
|
}
|
|
new_index_path[p] = idx;
|
|
orr |= idx;
|
|
}
|
|
|
|
n2.next[lo] = orr ? findmany_recurse(fm, fm2, new_index_path) : 0;
|
|
}
|
|
nodep = &fm2->nodes[node];
|
|
find_or_create((void **) &fm2->nodes2, &n2, &fm2->node2_count, sizeof(struct node2), &nodep->next[hi]);
|
|
}
|
|
|
|
//printf("returning node %d\n", node);
|
|
return node;
|
|
}
|
|
|
|
void findmany_go(struct findmany *fm) {
|
|
struct findmany2 fm2;
|
|
memset(&fm2, 0, sizeof(fm2));
|
|
autofree uint8_t *cur_index_path = calloc(1, fm->num_patterns);
|
|
#ifdef PROFILING
|
|
clock_t a = clock();
|
|
#endif
|
|
findmany_recurse(fm, &fm2, cur_index_path);
|
|
#ifdef PROFILING
|
|
clock_t b = clock();
|
|
printf("it took %d clocks to prepare the DFA\n", (int) (b - a));
|
|
#endif
|
|
|
|
prange_t pr = rangeconv(fm->range, MUST_FIND);
|
|
uint8_t *start = pr.start;
|
|
struct node *cur = &fm2.nodes[0];
|
|
for(uint8_t *ptr = start; ptr < start + pr.size; ptr++) {
|
|
uint8_t chr = *ptr;
|
|
|
|
cur = &fm2.nodes[fm2.nodes2[cur->next[chr / 16]].next[chr % 16]];
|
|
if(cur->terminates) {
|
|
for(int p = 0, bit = 1; p < fm->num_patterns; p++, bit <<= 1) {
|
|
if(cur->terminates & bit) {
|
|
struct pattern *pat = &fm->patterns[p];
|
|
addr_t result = ptr - pat->pattern_size - start + fm->range.start + 1;
|
|
if(*pat->result) {
|
|
die("found [%s] multiple times in range: first at %08llx then at %08llx", pat->name, (uint64_t) *pat->result, (uint64_t) result);
|
|
}
|
|
*pat->result = result + pat->offset;
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
free(fm2.index_paths); // could be done earlier
|
|
free(fm2.nodes);
|
|
free(fm2.nodes2);
|
|
|
|
for(int p = 0; p < fm->num_patterns; p++) {
|
|
struct pattern *pat = &fm->patterns[p];
|
|
if(!*pat->result) {
|
|
die("didn't find [%s] in range(%llx, %zx)", pat->name, (uint64_t) fm->range.start, fm->range.size);
|
|
}
|
|
}
|
|
|
|
free(fm->patterns);
|
|
free(fm);
|
|
}
|