Examples | Finding all call instructions that reference a function table by register - arizvisa/ida-minsc GitHub Wiki

Check that all of our references to the data aren't anything too crazy

table = db.address('gpf_libIdaTable_3656c0')
for ea in db.up(table):
    if not database.type.code(ea): print(db.disasm(ea), 'is not referencing an instruction')
    elif ins.mnemonic(ea) != 'lea': print(db.disasm(ea), 'is unexpected')

Iterate through all references finding the next call or branch instruction using a register

table = db.address('gpf', 'libIdaTable', 0x3656c0)
for ea in db.x.up(table):

    # continue searching while we're not at a CF_STOP instruction... like an unconditional branch, a return, hlt, etc.
    while_not_sentinel = lambda ea: not ins.type.sentinel(ea)

    # grab all the operands that are being written to, and then filter them for registers
    ops_written_to = ins.ops_write(ea)
    regs_written_to = [op for op in ops_written_to if isinstance(op, register_t)]

    # find the next instruction that reads or executes something using our register
    target = db.nextreg(ea, while_not_sentinel, *regs_written_to, read=True, execute=True)

    # tag it with the register name and start address so that if we need to correct, we have a place to start.
    reg, = regs_written_to
    tag = reg.name, ea
    db.tag(target, 'libIdaTable.call', tag)

Check our results to see which ones landed on an unconditional branch and resume our search

tag = 'libIdaTable.call'

results = []

# we really should be using list comprehensions by now
for f, _ in db.selectcontents(tag):
    for ea, res in func.select(f, tag):

        # if it's not a call, then unpack the register and our start address to save to a list
        if not ins.type.call(ea):
            reg, start = res[tag]
            results.append((ea, reg))
        continue
    continue

# print them out, and then filter our list for any instructions that don't use our register whatsoever
print('\n'.join(map(db.disasm, map(first, results))))
results = [(ea, reg) for ea, reg in results if not ins.ops_register(ea, reg)]

for ea, reg in results:
    while_not_sentinel = lambda ea: not ins.type.sentinel(ea)

    # follow the branch that we landed on and grab what it points to
    branch_tgt = ins.op(ea, 0)

    # find the next time the reg is either read from or executed
    target = db.nextreg(branch_tgt, while_not_sentinel, reg, read=True, execute=True)
    
    # now we can move our tag from the old location to the new one.
    oldtag = db.tag(ea, 'libIdaTable.call', None)
    db.tag(target, 'libIdaTable.call', oldtag)

Check to see what we've missed so that we can correct the rest of the manually


def selectall(*tags):
    '''because some people can't read list comprehensions'''
    for f, _ in db.selectcontents(*tags):
        for ea, res in func.select(f, *tags):
            yield ea, res
        continue
    return

# find all our results again
missed, not_branch = [], []
for ea, res in selectall('libIdaTable.call'):
    reg, start = res['libIdaTable.call']

    # gather instructions that don't actually use our register
    if not ins.ops_register(ea, reg):
        missed.append(ea)

    # gather instructions that aren't referencing a branch or call (might be loaded into another reg)
    if not ins.type.call(ea) and not ins.type.branch(ea):
        not_branch.append(ea)
    continue

# output the things that we missed
for ea in missed:
    reg, start = db.tag(ea, 'libIdaTable.call')
    print(db.disasm(start), reg, db.disasm(ea))

    # maybe we can bruteforce finding the register being used to execute something
    for opref in func.register(start, reg, execute=True):
        print(hex(start), reg, db.disasm(opref))
    continue

# find all non-branches that are being stored to a non-register (probably a local)
for ea in not_branch:
    if all(not isinstance(op, register_t) for op in ins.ops_write(ea)):
        print(db.disasm(ea))
    continue

Give up and use Hex-Rays since there's still too many results

import hexrays
table = db.address('gpf_libIdaTable_3656c0')
symbolname = db.name(table)

# collect all ctree matches that look like "(symbol[index])(...)" and only care about their address
candidates = {}
for f in not_branch:
    candidates[f] = set()
    for item in hexrays.match(f, '({:s}[_])(_);'.format(symbolname)):
        ea = hexrays.address(item)
        candidates[f].add(ea)
    continue

# we could check them with something like
everything = [ [(ea, item) for item in items] for ea, items in candidates.items() ]
print('\n'.join("{:s}\t\t{:s}".format(db.disasm(ea, comments=True), db.disasm(item)) for ea, item in itertools.chain(*everything)))

# but we don't care anymore, and so we move the tag blindly
for ea, items in candidates.items():
    tag = db.tag(ea, 'libIdaTable.call', None)
    for call in items:
        db.tag(ea, 'libIdaTable.call', tag)
    continue