idapython学习
常见函数功能:
通过utools可进行idapython函数功能查询,这边简单将一些常用的函数列举一下:
XrefsTo(ea, flags=0) #查看交叉引用
get_func_attr(ea, attr) #获取函数属性信息(具体attr属性可以网上查询)
get_cmt(ea, rptble) * #获取注释
set_cmt(ea, comm, rptble) #添加注释
prev_head(ea, minea) #获得当前地址指向指令的前一条指令的地址
next_head(ea, maxea) #获得当前地址指向指令的后一条指令的地址
generate_disasm_line(ea, flags=0) #得到ea地址处的反汇编语句
print_operand(ea, n, getn_flags=0, newtype=None) #获得当前指令的第n+1个操作数(字符串返回)
get_operand_value(ea, n) #获得当前指令的第n+1个操作数
GetDisasm(ea) #得到ea地址对应的汇编语句
set_name(ea, name, flags=0) #修改变量名
patch_byte(ea, x) #将ea地址处内容patch为x
print_insn_mnem(ea) -> str #输出ea地址处的操作指令
get_item_size(ea) -> asize_t #获取ea地址处指令长度
实际练习:
1、SUSCTF tttree
文件被混淆工具处理,每个代码块有一条有用语句,并通过栈结构指令以及retn来进行控制流变换
例如下面这个基本块:
在call $+5后栈布局如下:
之后再执行pop rax指令,rax值即为pop rax指令所在地址;再将rax+0xFFFFFFFFFFFEE6E7后存入[rsp+0x10]栈空间中,即最下面这个rax,此时栈布局如下图所示:
再执行popfq,pop rax指令,最后再执行retn(相当于pop rip指令),rip被更改为call $5后面一条指令地址+0xFFFFFFFFFFFEE6E7,由此来实现控制流的转换
可以发现所有基本块中,包含此类call $5的语句不超过两条,接下来通过idapython脚本来实现提取有效指令
首先通过下面的脚本来提取文件中所有代码块的起始、终止地址:
def get_blocks(start_ea, end_ea):
blocks = []
vis = set()
start = end = start_ea
while end < end_ea:
if idc.print_insn_mnem(end) == 'retn':
end += idc.get_item_size(end)
blocks.append((start, end))
start = end
vis.add(start)
else:
end += idc.get_item_size(end)
ea = start_ea
pattern = 'E8 00 00 00 00 58'
while ea < end_ea:
ea = idc.find_binary(ea, ida_search.SEARCH_DOWN, pattern)
if ea == ida_idaapi.BADADDR: break
value = idc.get_operand_value(ea + 6, 1)
start = end = (ea + 5 + value) & 0xFFFFFFFFFFFFFFFF
if start not in vis:
vis.add(start)
while end < end_ea:
if idc.print_insn_mnem(end) == 'retn':
end += idc.get_item_size(end)
blocks.append((start, end))
break
end += idc.get_item_size(end)
ea += 6
return blocks
得到各个代码块的起始、终止地址后,需要再提取代码块中的指令,由于可能存在混淆,所以通过下面脚本来进行指令提取
代码如下:
def get_asms(start, end):
asms = []
i = start
while i < end:
asm = idc.generate_disasm_line(i, 0).split(';')[0]
asms.append(asm)
i += idc.get_item_size(i)
return simplify_asms(asms)
#去除代码中连着的无用push、pop指令
def simplify_asms(asms):
left_asms = []
for asm in asms:
if asm.startswith("pop") and len(left_asms) != 0
and left_asms[-1].startswith("push") and
left_asms[-1][4:].strip() == asm[3:].strip():
left_asms.pop()
else:
left_asms.append(asm)
return ';'.join(left_asms)
再利用keystone进行代码的提取,定义基本块的属性如下:
•start_addr代码块的起始地址•txt代码块的代码•direct_next执行完此代码块后接下来要执行的地址•branch_next代码块中的条件跳转语句跳到的地址•call_next代码块调用函数地址
但是在进行代码重写的时候,keystone格式与ida格式不兼容,所以还需要对提取出来的指令进行适当修改
具体细节见下列代码:
def fit_ks(txt):
txt_list = txt.split(';')
new_txt_list = []
for asm in txt_list:
if asm == "nop" or 'align' in asm: continue
if 'retn' in asm:
new_txt_list.append('ret')
continue
if asm.startswith('j') and 'loc_' in asm:
new_txt_list.append(asm)
continue
asm_list = asm.split(' ')
new_asm_list = []
for word in asm_list:
if word.startswith('cs:'):
if 'ptr' not in word:
for typ in type_list:
if typ in word:
new_asm_list += [typ[:-1], 'ptr']
break
targets = word[3:].split(',')
addr = idc.get_name_ea_simple(targets[0])
if addr == 0xFFFFFFFFFFFFFFFF:
assert not targets[0].startswith('[')
targets[0] = '[' + targets[0] + ']'
else:
targets[0] = '[0x%x]' % addr
new_asm_list.append(','.join(targets))
else:
targets = word.split(',')
new_targets = []
for target in targets:
addr = idc.get_name_ea_simple(target)
if addr == 0xFFFFFFFFFFFFFFFF:
new_targets.append(target)
else:
new_targets.append('[0x%x]' % addr)
new_asm_list.append(','.join(new_targets))
new_txt_list.append(' '.join(new_asm_list))
return ';'.join(new_txt_list)
之后,再通过bfs来进行代码块的重组,将所有复用的代码块优先标记为nop,之后再改为jmp或者call addr_map
完整代码如下:
from idc import *
from queue import Queue
from idaapi import *
from keystone import *
type_list = ['dword_', 'asc_', 'byte_', 'unk_', 'qword_']
start_ea = 0x140001000
end_ea = 0x14001C695
ks = Ks(KS_ARCH_X86, KS_MODE_64)
class Block():
def __init__(self, start_addr, txt, direct_next=None, call_target=None, branch_next=None):
self.start_addr = start_addr
self.txt = fit_ks(txt)
self.direct_next = direct_next
self.branch_next = branch_next
self.call_target = call_target
def simplify_asms(asms):
left_asms = []
for asm in asms:
if asm.startswith("pop") and len(left_asms) != 0
and left_asms[-1].startswith("push") and
left_asms[-1][4:].strip() == asm[3:].strip():
left_asms.pop()
else:
left_asms.append(asm)
return ';'.join(left_asms)
def get_asms(start, end):
asms = []
i = start
while i < end:
asm = idc.generate_disasm_line(i, 0).split(';')[0]
asms.append(asm)
i += idc.get_item_size(i)
return simplify_asms(asms)
def get_blocks(start_ea, end_ea):
blocks = []
vis = set()
start = end = start_ea
while end < end_ea:
if idc.print_insn_mnem(end) == 'retn':
end += idc.get_item_size(end)
blocks.append((start, end))
start = end
vis.add(start)
else:
end += idc.get_item_size(end)
ea = start_ea
pattern = 'E8 00 00 00 00 58'
while ea < end_ea:
ea = idc.find_binary(ea, ida_search.SEARCH_DOWN, pattern)
if ea == ida_idaapi.BADADDR: break
value = idc.get_operand_value(ea + 6, 1)
start = end = (ea + 5 + value) & 0xFFFFFFFFFFFFFFFF
if start not in vis:
vis.add(start)
while end < end_ea:
if idc.print_insn_mnem(end) == 'retn':
end += idc.get_item_size(end)
blocks.append((start, end))
break
end += idc.get_item_size(end)
ea += 6
return blocks
def calc_target(block_start, data):
target_list = []
idx = 0
while True:
try:
idx = data[idx:].index(b'xE8x00x00x00x00x58') + idx
value = idc.get_operand_value(block_start + idx + 6, 1)
target = (block_start + idx + 5 + value) & 0xFFFFFFFFFFFFFFFF
target_list.append(target)
idx += 6
except:
return target_list
def fit_ks(txt):
txt_list = txt.split(';')
new_txt_list = []
for asm in txt_list:
if asm == "nop" or 'align' in asm: continue
if 'retn' in asm:
new_txt_list.append('ret')
continue
if asm.startswith('j') and 'loc_' in asm:
new_txt_list.append(asm)
continue
asm_list = asm.split(' ')
new_asm_list = []
for word in asm_list:
if word.startswith('cs:'):
if 'ptr' not in word:
for typ in type_list:
if typ in word:
new_asm_list += [typ[:-1], 'ptr']
break
targets = word[3:].split(',')
addr = idc.get_name_ea_simple(targets[0])
if addr == 0xFFFFFFFFFFFFFFFF:
assert not targets[0].startswith('[')
targets[0] = '[' + targets[0] + ']'
else:
targets[0] = '[0x%x]' % addr
new_asm_list.append(','.join(targets))
else:
targets = word.split(',')
new_targets = []
for target in targets:
addr = idc.get_name_ea_simple(target)
if addr == 0xFFFFFFFFFFFFFFFF:
new_targets.append(target)
else:
new_targets.append('[0x%x]' % addr)
new_asm_list.append(','.join(new_targets))
new_txt_list.append(' '.join(new_asm_list))
return ';'.join(new_txt_list)
if __name__ == '__main__':
blocks = get_blocks(start_ea, end_ea)
block_dict = {}
for block_start, block_end in blocks:
if block_start in [0x1400042d3]: continue
data = idc.get_bytes(block_start, block_end - block_start)
target_list = calc_target(block_start, data)
assert len(target_list) <= 2
if len(target_list):
idx = data.index(b'x50x50x9c')
txt = get_asms(block_start, block_start + idx)
if len(txt) == 0: txt = 'nop'
if len(target_list) == 2:
if txt == 'nop':
txt = 'call fun_%x' % target_list[1]
else:
txt += ';call fun_%x' % target_list[1]
block_dict[block_start] = Block(block_start, txt, target_list[0], call_target=target_list[1])
else:
if 'short' in txt:
branch_target = int(txt.split('short')[1].split(';')[0].strip().split('loc_')[1], 16)
txt = txt.replace('short', '')
block_dict[block_start] = Block(block_start, txt, target_list[0], branch_next=branch_target)
else:
block_dict[block_start] = Block(block_start, txt, target_list[0])
else:
txt = get_asms(block_start, block_end)
block_dict[block_start] = Block(block_start, txt)
new_start_ea = 0x140010000
new_addr = new_start_ea
new_data = b''
fun_que = Queue()
vis_fun = set()
addr_map = {}
todo_patches = []
fun_que.put(0x1400133B7)
vis_fun.add(0x1400133B7)
while not fun_que.empty():
fun_addr = fun_que.get()
branch_que = Queue()
vis_branchs = set()
vis_blocks = set()
branch_que.put(fun_addr)
vis_branchs.add(fun_addr)
while not branch_que.empty():
block = block_dict[branch_que.get()]
while True:
vis_blocks.add(block.start_addr)
addr_map[block.start_addr] = new_addr
if block.call_target != None and block.call_target not in vis_fun:
vis_fun.add(block.call_target)
fun_que.put(block.call_target)
if block.branch_next != None and block.branch_next not in vis_branchs:
vis_branchs.add(block.branch_next)
branch_que.put(block.branch_next)
if block.txt.startswith('j') or block.txt.startswith('call fun_'):
if block.txt.startswith('j') and '[' in block.txt:
block.txt = block.txt.replace('[','loc_')
block.txt = block.txt.replace(']','')
todo_patches.append((new_addr,block.txt))
new_data += b'x90'*6
new_addr += 6
elif len(block.txt) != 0:
encoding, count = ks.asm(block.txt, addr=new_addr)
new_data += bytes(encoding)
new_addr += len(encoding)
if block.direct_next != None:
block = block_dict[block.direct_next]
if block.start_addr in vis_blocks:
todo_patches.append((new_addr, 'jmp loc_%X' % block.start_addr))
new_data += b'x90' * 5
new_addr += 5
break
else:
break
for i in range(len(new_data)):
patch_byte(new_start_ea+i, new_data[i])
for addr, asm in todo_patches:
if asm.startswith('call fun_'):
fun_addr = int(asm.split('fun_')[1],16)
real_addr = addr_map[fun_addr]
asm = 'call 0x%x' % real_addr
elif asm.startswith('j') and 'loc_' in asm:
loc_addr = int(asm.split('loc_')[1].split(';')[0],16)
if loc_addr in addr_map:
real_addr = addr_map[loc_addr]
else:
real_addr = loc_addr
asm = '%s 0x%x' %(asm.split(' ')[0],real_addr)
encoding,count = ks.asm(asm,addr=addr)
data = bytes(encoding)
for i in range(len(data)):
patch_byte(addr+i,data[i])
for i in range(len(new_data)):
del_items(new_start_ea+i)
print('finished')
在ida窗口载入该脚本后,以0x140010000作为起始地址,写入新的指令集,反汇编如下:
main函数如下:
题目提示是tree,从网上找到一份tree的c代码,将关键函数恢复符号,还原得到关键代码如下:
根据tree的性质写出求逆脚本(根据树和堆的特性,遍历tree递归求解):
using namespace std;
int data1[32] = { 0xA8, 0x131, 0x113, 0x47, 0x9E, 0x3B, 0x3A, 0xBF,
0x92, 0xF0, 0x174, 0xC3, 0x289, 0x104, 0x260, 0x4D,
0x2FB, 0x9E, 0x191, 0x158, 0x7D, 0x4A, 0x1E9, 0x101,
0xD0, 0xFC, 0x70, 0x11F, 0x345, 0x162, 0x2A4, 0x92 };
int data2[32] = { 0xAC, 0xFD, 0x247, 0x115, 0xD4, 0x2B5, 0x1FC, 0x28B,
0x14A, 0x4C, 0x8E, 0xE9, 0x55, 0x12C, 0xF5, 0xE3,
0x81, 0x2E2, 0x1A8, 0x117, 0x152, 0x101, 0x3A, 0x1D0,
0xA8, 0xCC, 0x149, 0x137, 0x300, 0x1EC, 0x276, 0x247 };
int data3[32] = { 0xA2, 0xAF, 0x9D, 0xB7, 0xD2, 0xCB, 0xC7, 0xC6, 0xB0,
0xD5, 0xDA, 0xE3, 0xE6, 0xE8, 0xE9, 0xF3, 0xF4, 0xEF,
0xEE, 0xF7, 0xF9, 0xFF, 0x101, 0xF5, 0x109, 0x11F,
0x11A, 0x146, 0x124, 0x10F, 0x106, 0xDF };
int rand_seed = 0x12B1420;
int getrand()
{
return rand_seed = 48271i64 * rand_seed % 0x7FFFFFFF;
}
int key[32];
int idx[32];
int rand_data[32];
int add_data[32];
void solve(int p, int l1, int r1, int l2, int r2)
{
key
= data3[r1] - add_data
;
if (idx[r1] + 1 <= r2)
{
int left_idx = (data2[r1] - key
) / 23 - 1;
solve(left_idx, r1 - r2 + idx[r1], r1 - 1, idx[r1] + 1, r2);
}
if (idx[r1] - 1 >= l2)
{
int right_idx = (data1[r1] - key
) / 23 - 1;
solve(right_idx, l1, r1 - r2 + idx[r1] - 1, l2, idx[r1] - 1);
}
}
int main()
{
vector<int> V(32);
for (int i = 0; i < 32; i++)
V[i] = data3[i];
sort(V.begin(), V.end());
for (int i = 0; i < 32; i++)
idx[i] = lower_bound(V.begin(), V.end(), data3[i]) - V.begin();
for (int i = 0; i < 32; i++)
add_data[i] = i + getrand() % 107 + 97;
for (int i = 0; i < 32; i++)
rand_data[i] = getrand();
int rt = min_element(rand_data, rand_data + 32) - rand_data;
solve(rt, 0, 31, 0, 31);
for (int i = 0; i < 32; i++)
printf("%c", key[i]);
return 0;
}
#8226d8a68d25d8f03be17c4d7027b29c
总结:
1、由ida指令转到keystone去写需要注意很多地方,包括'dword_'等字符串以及在后面进行addr_map修改时区分函数调用系统函数调用等细节;
2、在代码块的bfs过程中,优先代码块的拓展(即call_target的遍历),遍历完之后再进行跳转语句的拓展(即branch_next的遍历)
2、巅峰极客
题目是一个类迷宫题,从开始函数开始,每个函数都包含一次选择,除了第一个是直接if比较外,其余都是switch语句:
知道通过最短路径到达sub_54DE35函数
解决本题除了写交互脚本进行bfs遍历外还可以试着写idapython脚本实现bfs来解决
思路如下:从sub_54DE35开始,查看交叉引用,将调用函数的地址以及调用函数的开始地址加入队列来再次进行遍历,遍历过程中如果碰到已经遇到的地址,则停止遍历,提取过程中的switch case值(即flag路径),直到遍历到开始函数后面的那个正确的if语句后倒序输出过程中的case值
脚本如下:
from idc import *
from idautils import *
def run_one(addr, paths, flag):
count = 0
found = False
to_handle_refs = []
for xref in XrefsTo(addr, 0):
count += 1
cur_fm = xref.frm
cur_start = idc.get_func_attr(cur_fm, FUNCATTR_START)
if cur_start not in paths:
fm = cur_fm
fun_start = cur_start
found = True
to_handle_refs.append((fm, fun_start))
if found:
rets = []
for fm, fun_start in to_handle_refs:
case_ea = fm - 5
comment = idc.get_cmt(case_ea, 1)
assert 'case' in comment
c = (chr(int(comment.split('case')[1])))
rets.append((c, fun_start))
return rets
return None
start = 0x54DE35
addr = start
paths = []
flag = ''
queue = [(addr, paths, flag)]
while len(queue) > 0:
new_queue = []
for addr, paths, flag in queue:
rets = run_one(addr, paths, flag)
if rets is None:
continue
for c, next_fun in rets:
if next_fun == 0x40187c:
print ('succ:S%s' %(flag+c)[::-1])
continue
new_queue.append((next_fun, paths+[addr], flag+c))
queue = new_queue.copy()
#SSSSSSSSSDDDDDDWWWWAAWWAAWWDDDDDDDDDDDDDDDDDDDDSSDDSSAASSSSAAAAWWAAWWWWAASSSSSSAASSDDSSSSDDWWWWDDSSDDDDWWDDDDDDWWAAAAWWDDDDWWAAWWWWDDSSDDSSSSSSSSSSDDDDSSAAAASSSSSSAASSSSAAWWAASSSSDDDDDDDDDDSSDDSSAASSSSAASSSSSSSSDDWWWWWWDDWWWWDDWWWWDDSSSSSSSSAASSSSDDDDSSDDDDWWDDSSDDSSDDDDDDDDSSDDSSSSDDDDSSDDSSSSSSDDSSSSDDDDSSSSDDDDDDSSSSDDSSDSSASSSSAASSDDSSAASSDDDDDDSSDDDDWWDDSSSSSSDDDDWWAAWWWWDDDDSSSSDDDDDDSSAASSSSSSDDDDDDDDSSDDDDSSSSSSDDWWDDDDDDSSSSSSSSAASSDDSSSSSSAASSDDS
Dest0g3 战队由天命战队和成都信息工程大学道格安全研究实验室联合组建,热爱CTF。
研究方向涉及Web安全、逆向分析、病毒分析、渗透测试、黑产分析、漏洞挖掘、无线安全、密码学、隐写与取证技术等热门研究方向,团队成员一直致力于信息安全技术的研究与实践。
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论