不知道大家有没有忘记往期知识呀?再回顾一下吧!
p.s 注意选择自己电脑显卡对应的Toolkit工具链
默认情况下,CUDA 编译器驱动程序 nvcc 将 cubin 文件嵌入到主机可执行文件。但它们也可以通过使用“ -cubin”选项单独生成nvcc。cubin 文件由 CUDA 驱动程序 API 在运行时加载
一般逆向cuda的思路就是先看主逻辑寻找可用信息,然后使用cuobjdump 来获得PTX汇编代码,理清PTX汇编的逻辑后基本就解决问题了
cuobjdump用法:
cuobjdump [options] <file>,
cuobjdump -ptx 黑客不许哭.exe > output.ptx
Fatbin elf code:
================
arch = sm_52
code version = [1,7]
host = windows
compile_size = 64bit
Fatbin ptx code:
================
arch = sm_52
code version = [8,4]
host = windows
compile_size = 64bit
compressed
ptxasOptions =
//
//
//
//
//
//
.version 8.4
.target sm_52
.address_size 64
//
.visible .entry _Z12vectorAddGPUPKdS0_Pdi(
.param .u64 _Z12vectorAddGPUPKdS0_Pdi_param_0,
.param .u64 _Z12vectorAddGPUPKdS0_Pdi_param_1,
.param .u64 _Z12vectorAddGPUPKdS0_Pdi_param_2,
.param .u32 _Z12vectorAddGPUPKdS0_Pdi_param_3
)
{
.reg .pred %p<2>;
.reg .b32 %r<6>;
.reg .f64 %fd<4>;
.reg .b64 %rd<11>;
ld.param.u64 %rd1, [_Z12vectorAddGPUPKdS0_Pdi_param_0];
ld.param.u64 %rd2, [_Z12vectorAddGPUPKdS0_Pdi_param_1];
ld.param.u64 %rd3, [_Z12vectorAddGPUPKdS0_Pdi_param_2];
ld.param.u32 %r2, [_Z12vectorAddGPUPKdS0_Pdi_param_3];
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %ntid.x;
mov.u32 %r5, %tid.x;
mad.lo.s32 %r1, %r3, %r4, %r5;
setp.ge.s32 %p1, %r1, %r2;
@%p1 bra $L__BB0_2;
cvta.to.global.u64 %rd4, %rd1;
mul.wide.s32 %rd5, %r1, 8;
add.s64 %rd6, %rd4, %rd5;
cvta.to.global.u64 %rd7, %rd2;
add.s64 %rd8, %rd7, %rd5;
ld.global.f64 %fd1, [%rd8];
ld.global.f64 %fd2, [%rd6];
add.f64 %fd3, %fd2, %fd1;
cvta.to.global.u64 %rd9, %rd3;
add.s64 %rd10, %rd9, %rd5;
st.global.f64 [%rd10], %fd3;
$L__BB0_2:
ret;
}
//
.visible .entry _Z23vectorMulElementwiseGPUPKdS0_Pdi(
.param .u64 _Z23vectorMulElementwiseGPUPKdS0_Pdi_param_0,
.param .u64 _Z23vectorMulElementwiseGPUPKdS0_Pdi_param_1,
.param .u64 _Z23vectorMulElementwiseGPUPKdS0_Pdi_param_2,
.param .u32 _Z23vectorMulElementwiseGPUPKdS0_Pdi_param_3
)
{
.reg .pred %p<2>;
.reg .b32 %r<6>;
.reg .f64 %fd<4>;
.reg .b64 %rd<11>;
ld.param.u64 %rd1, [_Z23vectorMulElementwiseGPUPKdS0_Pdi_param_0];
ld.param.u64 %rd2, [_Z23vectorMulElementwiseGPUPKdS0_Pdi_param_1];
ld.param.u64 %rd3, [_Z23vectorMulElementwiseGPUPKdS0_Pdi_param_2];
ld.param.u32 %r2, [_Z23vectorMulElementwiseGPUPKdS0_Pdi_param_3];
mov.u32 %r3, %ctaid.x;
mov.u32 %r4, %ntid.x;
mov.u32 %r5, %tid.x;
mad.lo.s32 %r1, %r3, %r4, %r5;
setp.ge.s32 %p1, %r1, %r2;
@%p1 bra $L__BB1_2;
cvta.to.global.u64 %rd4, %rd1;
mul.wide.s32 %rd5, %r1, 8;
add.s64 %rd6, %rd4, %rd5;
cvta.to.global.u64 %rd7, %rd2;
add.s64 %rd8, %rd7, %rd5;
ld.global.f64 %fd1, [%rd8];
ld.global.f64 %fd2, [%rd6];
fma.rn.f64 %fd3, %fd2, %fd1, 0d4059000000000000;
cvta.to.global.u64 %rd9, %rd3;
add.s64 %rd10, %rd9, %rd5;
st.global.f64 [%rd10], %fd3;
$L__BB1_2:
ret;
}
for(int i=0;i<len;i++)
c[i]=a[i]+b[i];
c[i]=c[i]*d[i]+100.00;
打断点调试发现,经过vectorAddGPU后之前的44位数组每个值增加了1,说明是和全1数组进行的加法,即b数组的值全是1
现在我们知道了整体的加密逻辑:
1.乘1.020123456789;
2.加全1数组
3.乘key数组;
4.加100
解密逻辑就是:
1.密文减100;
2.密文除以密钥数组
3.密文减1数组
4.密文除以初始化浮点数
解密脚本如下:
intmain()
{
double encflag[] =
{
4358.58716, 6122.2983, 2158.74574, 5973.017537, 9173.840881, 6164.67827,
12293.528276, 4091.327439, 3360.696562, 2403.667017, 3199.455077,
4962.117508,
8266.407604, 2863.062918, 1044.626306, 1067.5308730000002, 3217.476319,
6260.942959, 3278.952568, 160.724197, 596.797742, 3277.973032,
6368.757598,
842.858109, 5925.142209, 3046.937162, 12752.384458, 2442.54747,
1827.164764,
4903.961921, 5619.869598, 3851.247916, 4472.987644, 13135.636855,
1640.630636,
975.429551, 2174.379531, 2289.845471, 2605.707441, 1488.586824,
12216.019619,
4588.270425, 4803.36317, 13035.30263
};
double key_float[] =
{
60.51846366284686, 89.4737043286176, 24.031047113523933,
84.68873702464015,
104.66953644646323, 83.75627693648984, 96.41044018110416,
75.27071882034213,
60.33140727998576, 46.10475987767577, 56.28563000222285,
86.68936481373537,
80.87786332435297, 55.29894355978243, 9.261748448423328,
20.6272127322797,
31.189741971747896, 116.18656005122571, 30.859918262868042,
1.0633446004217317,
10.591447767777225, 55.64965261721374, 122.95044769452201,
7.140637105592679,
55.44977106531295, 62.827038867512506, 125.30574894504994,
45.94487116254584,
32.57185367060958, 92.37291765689986, 117.68050783530462,
63.422414786033976,
84.08593452538155, 125.30354189600813, 26.504600725852114,
15.6085145259943,
35.687075116213585, 37.67352051379848, 24.32434117146088,
25.692484908155073,
116.46382825728031, 86.30264794289376, 79.51984419851664,
100.65174601005425,
};
double key1 = 1.020123456789;
for (int i = 0; i < sizeof(encflag) / sizeof(encflag[0]); i++)
{
double value = ((encflag[i] - 100) / key_float[i] - 1) / key1;
char c = (char)round(value);
printf("%c",c);
}..
return 0;
}
DASCTF{34056b0c-a3d7-71ef-b132-92e8688d4e29}
Fatbin elf code:
================
arch = sm_52
code version = [1,7]
host = linux
compile_size = 64bit
Fatbin elf code:
================
arch = sm_52
code version = [1,7]
host = linux
compile_size = 64bit
Fatbin ptx code:
================
arch = sm_52
code version = [8,0]
host = linux
compile_size = 64bit
compressed
.version 8.0
.target sm_52
.address_size 64
.extern .func (.param .b32 func_retval0) vprintf
(
.param .b64 vprintf_param_0,
.param .b64 vprintf_param_1
)
;
.const .align 1 .b8 T[256] = {99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103,
43, 254, 215, 171, 118, 202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175,
156, 164, 114, 192, 183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113,
216, 49, 21, 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178,
117, 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, 83,
209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, 208, 239, 170,
251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, 81, 163, 64, 143, 146,
157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, 205, 12, 19, 236, 95, 151,
68, 23, 196, 167, 126, 61, 100, 93, 25, 115, 96, 129, 79, 220, 34, 42, 144, 136,
70, 238, 184, 20, 222, 94, 11, 219, 224, 50, 58, 10, 73, 6, 36, 92, 194, 211,
172, 98, 145, 149, 228, 121, 231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244,
234, 101, 122, 174, 8, 186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31,
75, 189, 139, 138, 112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193,
29, 158, 225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40,
223, 140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22};
.const .align 1 .b8 RT[256] = {82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163,
158, 129, 243, 215, 251, 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68,
196, 222, 233, 203, 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66,
250, 195, 78, 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209,
37, 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146,
108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, 144,
216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, 208, 44, 30,
143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, 58, 145, 17, 65, 79, 103,
220, 234, 151, 242, 207, 206, 240, 180, 230, 115, 150, 172, 116, 34, 231, 173,
53, 133, 226, 249, 55, 232, 28, 117, 223, 110, 71, 241, 26, 113, 29, 41, 197,
137, 111, 183, 98, 14, 170, 24, 190, 27, 252, 86, 62, 75, 198, 210, 121, 32, 154,
219, 192, 254, 120, 205, 90, 244, 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16,
89, 39, 128, 236, 95, 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147,
201, 156, 239, 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83,
153, 97, 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125};
.global .align 1 .b8 $str[8] = {103, 105, 102, 116, 49, 58, 10, 0};
.global .align 1 .b8 $str$1[6] = {37, 48, 50, 120, 32, 0};
.global .align 1 .b8 $str$2[2] = {10, 0};
.global .align 1 .b8 $str$3[8] = {103, 105, 102, 116, 50, 58, 10, 0};
.global .align 1 .b8 $str$4[8] = {103, 105, 102, 116, 51, 58, 10, 0};
.global .align 1 .b8 $str$5[8] = {103, 105, 102, 116, 52, 58, 10, 0};
.global .align 1 .b8 $str$6[8] = {103, 105, 102, 116, 53, 58, 10, 0};
.visible .entry _Z14encrypt_kernelPhh(
.param .u64 _Z14encrypt_kernelPhh_param_0,
.param .u8 _Z14encrypt_kernelPhh_param_1
)
{
.local .align 8 .b8 __local_depot0[8];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<41>;
.reg .b16 %rs<62>;
.reg .b32 %r<265>;
.reg .b64 %rd<103>;
mov.u64 %SPL, __local_depot0;
cvta.local.u64 %SP, %SPL;
ld.param.u8 %rs12, [_Z14encrypt_kernelPhh_param_1];
ld.param.u64 %rd19, [_Z14encrypt_kernelPhh_param_0];
cvta.to.global.u64 %rd1, %rd19;
add.u64 %rd20, %SP, 0;
add.u64 %rd2, %SPL, 0;
mov.u32 %r1, %ntid.x;
mov.u32 %r54, %ctaid.x;
mul.lo.s32 %r2, %r54, %r1;
mov.u32 %r3, %tid.x;
add.s32 %r4, %r2, %r3;
setp.ge.u32 %p1, %r3, %r1;
cvt.s64.s32 %rd21, %r4;
add.s64 %rd3, %rd1, %rd21;
@%p1 bra $L__BB0_12;
ld.global.u8 %rs13, [%rd3];
cvt.u16.u32 %rs14, %r4;
mul.lo.s16 %rs15, %rs14, 73;
add.s16 %rs16, %rs15, %rs12;
xor.b16 %rs17, %rs13, %rs16;
and.b16 %rs18, %rs17, 240;
shr.u16 %rs19, %rs18, 4;
shl.b16 %rs20, %rs17, 4;
or.b16 %rs58, %rs19, %rs20;
mov.u32 %r242, 0;
mov.u64 %rd24, T;
$L__BB0_2:
cvt.u64.u16 %rd22, %rs58;
and.b64 %rd23, %rd22, 255;
add.s64 %rd25, %rd24, %rd23;
ld.const.u8 %rs21, [%rd25];
shr.u16 %rs22, %rs21, 4;
shl.b16 %rs23, %rs21, 4;
or.b16 %rs24, %rs22, %rs23;
cvt.u16.u32 %rs25, %r242;
xor.b16 %rs58, %rs24, %rs25;
add.s32 %r242, %r242, 1;
setp.lt.u32 %p2, %r242, 10485760;
@%p2 bra $L__BB0_2;
mov.u32 %r243, 0;
$L__BB0_4:
cvt.u64.u16 %rd26, %rs58;
and.b64 %rd27, %rd26, 255;
add.s64 %rd29, %rd24, %rd27;
ld.const.u8 %rs26, [%rd29];
shr.u16 %rs27, %rs26, 4;
shl.b16 %rs28, %rs26, 4;
or.b16 %rs29, %rs27, %rs28;
cvt.u16.u32 %rs30, %r243;
xor.b16 %rs58, %rs29, %rs30;
add.s32 %r243, %r243, 1;
setp.lt.u32 %p3, %r243, 10485760;
@%p3 bra $L__BB0_4;
mov.u32 %r244, 0;
$L__BB0_6:
cvt.u64.u16 %rd30, %rs58;
and.b64 %rd31, %rd30, 255;
add.s64 %rd33, %rd24, %rd31;
ld.const.u8 %rs31, [%rd33];
shr.u16 %rs32, %rs31, 4;
shl.b16 %rs33, %rs31, 4;
or.b16 %rs34, %rs32, %rs33;
cvt.u16.u32 %rs35, %r244;
xor.b16 %rs58, %rs34, %rs35;
add.s32 %r244, %r244, 1;
setp.lt.u32 %p4, %r244, 10485760;
@%p4 bra $L__BB0_6;
mov.u32 %r245, 0;
$L__BB0_8:
cvt.u64.u16 %rd34, %rs58;
and.b64 %rd35, %rd34, 255;
add.s64 %rd37, %rd24, %rd35;
ld.const.u8 %rs36, [%rd37];
shr.u16 %rs37, %rs36, 4;
shl.b16 %rs38, %rs36, 4;
or.b16 %rs39, %rs37, %rs38;
cvt.u16.u32 %rs40, %r245;
xor.b16 %rs58, %rs39, %rs40;
add.s32 %r245, %r245, 1;
setp.lt.u32 %p5, %r245, 10485760;
@%p5 bra $L__BB0_8;
mov.u32 %r246, 0;
$L__BB0_10:
cvt.u64.u16 %rd38, %rs58;
and.b64 %rd39, %rd38, 255;
add.s64 %rd41, %rd24, %rd39;
ld.const.u8 %rs41, [%rd41];
shr.u16 %rs42, %rs41, 4;
shl.b16 %rs43, %rs41, 4;
or.b16 %rs44, %rs42, %rs43;
cvt.u16.u32 %rs45, %r246;
xor.b16 %rs58, %rs44, %rs45;
add.s32 %r246, %r246, 1;
setp.lt.u32 %p6, %r246, 10485760;
@%p6 bra $L__BB0_10;
st.global.u8 [%rd3], %rs58;
$L__BB0_12:
bar.sync 0;
setp.ne.s32 %p7, %r4, 0;
@%p7 bra $L__BB0_17;
mov.u64 %rd42, $str;
cvta.global.u64 %rd43, %rd42;
mov.u64 %rd44, 0;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd43;
.param .b64 param1;
st.param.b64 [param1+0], %rd44;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r60, [retval0+0];
}
setp.eq.s32 %p8, %r1, 0;
@%p8 bra $L__BB0_16;
mov.u32 %r247, 0;
mov.u64 %rd45, $str$1;
cvta.global.u64 %rd46, %rd45;
mov.u64 %rd97, %rd1;
$L__BB0_15:
ld.global.u8 %r62, [%rd97];
st.local.u32 [%rd2], %r62;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd46;
.param .b64 param1;
st.param.b64 [param1+0], %rd20;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r63, [retval0+0];
}
add.s64 %rd97, %rd97, 1;
add.s32 %r247, %r247, 1;
setp.lt.u32 %p9, %r247, %r1;
@%p9 bra $L__BB0_15;
$L__BB0_16:
mov.u64 %rd48, $str$2;
cvta.global.u64 %rd49, %rd48;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd49;
.param .b64 param1;
st.param.b64 [param1+0], %rd44;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r64, [retval0+0];
}
$L__BB0_17:
bar.sync 0;
setp.eq.s32 %p10, %r1, 0;
setp.ne.s32 %p11, %r3, 0;
or.pred %p12, %p11, %p10;
@%p12 bra $L__BB0_20;
cvt.s64.s32 %rd51, %r2;
add.s64 %rd98, %rd1, %rd51;
mov.u32 %r248, 0;
$L__BB0_19:
add.s32 %r248, %r248, 1;
rem.u32 %r66, %r248, %r1;
add.s32 %r67, %r66, %r2;
cvt.s64.s32 %rd52, %r67;
add.s64 %rd53, %rd1, %rd52;
ld.global.u8 %rs46, [%rd98];
xor.b16 %rs47, %rs46, %rs12;
ld.global.u8 %rs48, [%rd53];
xor.b16 %rs49, %rs47, %rs48;
st.global.u8 [%rd98], %rs49;
add.s64 %rd98, %rd98, 1;
setp.lt.u32 %p13, %r248, %r1;
@%p13 bra $L__BB0_19;
$L__BB0_20:
bar.sync 0;
@%p7 bra $L__BB0_25;
mov.u64 %rd54, $str$3;
cvta.global.u64 %rd55, %rd54;
mov.u64 %rd56, 0;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd55;
.param .b64 param1;
st.param.b64 [param1+0], %rd56;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r68, [retval0+0];
}
@%p10 bra $L__BB0_24;
mov.u32 %r249, 0;
mov.u64 %rd57, $str$1;
cvta.global.u64 %rd58, %rd57;
mov.u64 %rd99, %rd1;
$L__BB0_23:
ld.global.u8 %r70, [%rd99];
st.local.u32 [%rd2], %r70;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd58;
.param .b64 param1;
st.param.b64 [param1+0], %rd20;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r71, [retval0+0];
}
add.s64 %rd99, %rd99, 1;
add.s32 %r249, %r249, 1;
setp.lt.u32 %p16, %r249, %r1;
@%p16 bra $L__BB0_23;
$L__BB0_24:
mov.u64 %rd60, $str$2;
cvta.global.u64 %rd61, %rd60;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd61;
.param .b64 param1;
st.param.b64 [param1+0], %rd56;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r72, [retval0+0];
}
$L__BB0_25:
bar.sync 0;
and.b32 %r73, %r3, 1;
setp.eq.b32 %p18, %r73, 1;
add.s32 %r74, %r3, 1;
rem.u32 %r75, %r74, %r1;
add.s32 %r76, %r75, %r2;
cvt.s64.s32 %rd63, %r76;
add.s64 %rd11, %rd1, %rd63;
or.pred %p19, %p1, %p18;
@%p19 bra $L__BB0_27;
ld.global.u8 %rs50, [%rd3];
ld.global.u8 %rs51, [%rd11];
st.global.u8 [%rd3], %rs51;
st.global.u8 [%rd11], %rs50;
$L__BB0_27:
bar.sync 0;
@%p7 bra $L__BB0_32;
mov.u64 %rd64, $str$4;
cvta.global.u64 %rd65, %rd64;
mov.u64 %rd66, 0;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd65;
.param .b64 param1;
st.param.b64 [param1+0], %rd66;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r77, [retval0+0];
}
@%p10 bra $L__BB0_31;
mov.u32 %r250, 0;
mov.u64 %rd67, $str$1;
cvta.global.u64 %rd68, %rd67;
mov.u64 %rd100, %rd1;
$L__BB0_30:
ld.global.u8 %r79, [%rd100];
st.local.u32 [%rd2], %r79;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd68;
.param .b64 param1;
st.param.b64 [param1+0], %rd20;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r80, [retval0+0];
}
add.s64 %rd100, %rd100, 1;
add.s32 %r250, %r250, 1;
setp.lt.u32 %p22, %r250, %r1;
@%p22 bra $L__BB0_30;
$L__BB0_31:
mov.u64 %rd70, $str$2;
cvta.global.u64 %rd71, %rd70;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd71;
.param .b64 param1;
st.param.b64 [param1+0], %rd66;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r81, [retval0+0];
}
$L__BB0_32:
bar.sync 0;
setp.lt.s32 %p24, %r3, 1;
or.pred %p25, %p24, %p1;
shr.u32 %r82, %r3, 31;
add.s32 %r83, %r3, %r82;
and.b32 %r84, %r83, -2;
sub.s32 %r85, %r3, %r84;
setp.ne.s32 %p26, %r85, 1;
or.pred %p27, %p25, %p26;
@%p27 bra $L__BB0_34;
ld.global.u8 %rs52, [%rd3];
ld.global.u8 %rs53, [%rd11];
st.global.u8 [%rd3], %rs53;
st.global.u8 [%rd11], %rs52;
$L__BB0_34:
bar.sync 0;
@%p7 bra $L__BB0_39;
mov.u64 %rd73, $str$5;
cvta.global.u64 %rd74, %rd73;
mov.u64 %rd75, 0;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd74;
.param .b64 param1;
st.param.b64 [param1+0], %rd75;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r86, [retval0+0];
}
@%p10 bra $L__BB0_38;
mov.u32 %r251, 0;
mov.u64 %rd76, $str$1;
cvta.global.u64 %rd77, %rd76;
mov.u64 %rd101, %rd1;
$L__BB0_37:
ld.global.u8 %r88, [%rd101];
st.local.u32 [%rd2], %r88;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd77;
.param .b64 param1;
st.param.b64 [param1+0], %rd20;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r89, [retval0+0];
}
add.s64 %rd101, %rd101, 1;
add.s32 %r251, %r251, 1;
setp.lt.u32 %p30, %r251, %r1;
@%p30 bra $L__BB0_37;
$L__BB0_38:
mov.u64 %rd79, $str$2;
cvta.global.u64 %rd80, %rd79;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd80;
.param .b64 param1;
st.param.b64 [param1+0], %rd75;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r90, [retval0+0];
}
$L__BB0_39:
bar.sync 0;
and.b32 %r91, %r3, 7;
setp.ne.s32 %p32, %r91, 0;
or.pred %p33, %p1, %p32;
@%p33 bra $L__BB0_43;
ld.global.u32 %r259, [%rd3+4];
ld.global.u32 %r260, [%rd3];
mov.u32 %r258, 0;
mov.u32 %r257, -239350328;
mov.u32 %r256, 387276957;
mov.u32 %r255, 2027808484;
mov.u32 %r254, -626627285;
mov.u32 %r253, 1013904242;
mov.u32 %r252, -1640531527;
$L__BB0_41:
shl.b32 %r99, %r259, 4;
add.s32 %r100, %r99, -1556008596;
shr.u32 %r101, %r259, 5;
add.s32 %r102, %r101, -939442524;
xor.b32 %r103, %r102, %r100;
add.s32 %r104, %r252, %r259;
xor.b32 %r105, %r103, %r104;
add.s32 %r106, %r105, %r260;
shl.b32 %r107, %r106, 4;
add.s32 %r108, %r107, 1013904242;
add.s32 %r109, %r106, %r252;
xor.b32 %r110, %r108, %r109;
shr.u32 %r111, %r106, 5;
add.s32 %r112, %r111, 338241895;
xor.b32 %r113, %r110, %r112;
add.s32 %r114, %r113, %r259;
shl.b32 %r115, %r114, 4;
add.s32 %r116, %r115, -1556008596;
add.s32 %r117, %r253, %r114;
shr.u32 %r118, %r114, 5;
add.s32 %r119, %r118, -939442524;
xor.b32 %r120, %r119, %r116;
xor.b32 %r121, %r120, %r117;
add.s32 %r122, %r121, %r106;
shl.b32 %r123, %r122, 4;
add.s32 %r124, %r123, 1013904242;
add.s32 %r125, %r122, %r253;
xor.b32 %r126, %r124, %r125;
shr.u32 %r127, %r122, 5;
add.s32 %r128, %r127, 338241895;
xor.b32 %r129, %r126, %r128;
add.s32 %r130, %r129, %r114;
shl.b32 %r131, %r130, 4;
add.s32 %r132, %r131, -1556008596;
add.s32 %r133, %r254, %r130;
shr.u32 %r134, %r130, 5;
add.s32 %r135, %r134, -939442524;
xor.b32 %r136, %r135, %r132;
xor.b32 %r137, %r136, %r133;
add.s32 %r138, %r137, %r122;
shl.b32 %r139, %r138, 4;
add.s32 %r140, %r139, 1013904242;
add.s32 %r141, %r138, %r254;
xor.b32 %r142, %r140, %r141;
shr.u32 %r143, %r138, 5;
add.s32 %r144, %r143, 338241895;
xor.b32 %r145, %r142, %r144;
add.s32 %r146, %r145, %r130;
shl.b32 %r147, %r146, 4;
add.s32 %r148, %r147, -1556008596;
add.s32 %r149, %r255, %r146;
shr.u32 %r150, %r146, 5;
add.s32 %r151, %r150, -939442524;
xor.b32 %r152, %r151, %r148;
xor.b32 %r153, %r152, %r149;
add.s32 %r154, %r153, %r138;
shl.b32 %r155, %r154, 4;
add.s32 %r156, %r155, 1013904242;
add.s32 %r157, %r154, %r255;
xor.b32 %r158, %r156, %r157;
shr.u32 %r159, %r154, 5;
add.s32 %r160, %r159, 338241895;
xor.b32 %r161, %r158, %r160;
add.s32 %r162, %r161, %r146;
shl.b32 %r163, %r162, 4;
add.s32 %r164, %r163, -1556008596;
add.s32 %r165, %r256, %r162;
shr.u32 %r166, %r162, 5;
add.s32 %r167, %r166, -939442524;
xor.b32 %r168, %r167, %r164;
xor.b32 %r169, %r168, %r165;
add.s32 %r170, %r169, %r154;
shl.b32 %r171, %r170, 4;
add.s32 %r172, %r171, 1013904242;
add.s32 %r173, %r170, %r256;
xor.b32 %r174, %r172, %r173;
shr.u32 %r175, %r170, 5;
add.s32 %r176, %r175, 338241895;
xor.b32 %r177, %r174, %r176;
add.s32 %r178, %r177, %r162;
shl.b32 %r179, %r178, 4;
add.s32 %r180, %r179, -1556008596;
add.s32 %r181, %r257, -1013904242;
add.s32 %r182, %r181, %r178;
shr.u32 %r183, %r178, 5;
add.s32 %r184, %r183, -939442524;
xor.b32 %r185, %r184, %r180;
xor.b32 %r186, %r185, %r182;
add.s32 %r187, %r186, %r170;
shl.b32 %r188, %r187, 4;
add.s32 %r189, %r188, 1013904242;
add.s32 %r190, %r187, %r181;
xor.b32 %r191, %r189, %r190;
shr.u32 %r192, %r187, 5;
add.s32 %r193, %r192, 338241895;
xor.b32 %r194, %r191, %r193;
add.s32 %r195, %r194, %r178;
shl.b32 %r196, %r195, 4;
add.s32 %r197, %r196, -1556008596;
add.s32 %r198, %r257, 1640531527;
add.s32 %r199, %r198, %r195;
shr.u32 %r200, %r195, 5;
add.s32 %r201, %r200, -939442524;
xor.b32 %r202, %r201, %r197;
xor.b32 %r203, %r202, %r199;
add.s32 %r204, %r203, %r187;
shl.b32 %r205, %r204, 4;
add.s32 %r206, %r205, 1013904242;
add.s32 %r207, %r204, %r198;
xor.b32 %r208, %r206, %r207;
shr.u32 %r209, %r204, 5;
add.s32 %r210, %r209, 338241895;
xor.b32 %r211, %r208, %r210;
add.s32 %r212, %r211, %r195;
shl.b32 %r213, %r212, 4;
add.s32 %r214, %r213, -1556008596;
add.s32 %r215, %r257, %r212;
shr.u32 %r216, %r212, 5;
add.s32 %r217, %r216, -939442524;
xor.b32 %r218, %r217, %r214;
xor.b32 %r219, %r218, %r215;
add.s32 %r260, %r219, %r204;
shl.b32 %r220, %r260, 4;
add.s32 %r221, %r220, 1013904242;
add.s32 %r222, %r260, %r257;
xor.b32 %r223, %r221, %r222;
shr.u32 %r224, %r260, 5;
add.s32 %r225, %r224, 338241895;
xor.b32 %r226, %r223, %r225;
add.s32 %r259, %r226, %r212;
add.s32 %r257,
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd83;
.param .b64 param1;
st.param.b64 [param1+0], %rd84;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r227, [retval0+0];
}
@%p10 bra $L__BB0_51;
add.s32 %r229, %r1, -1;
and.b32 %r264, %r1, 3;
setp.lt.u32 %p37, %r229, 3;
mov.u32 %r263, 0;
@%p37 bra $L__BB0_48;
sub.s32 %r262, %r1, %r264;
mov.u64 %rd87, $str$1;
cvta.global.u64 %rd88, %rd87;
$L__BB0_47:
cvt.s64.s32 %rd85, %r263;
add.s64 %rd86, %rd1, %rd85;
ld.global.u8 %r231, [%rd86];
st.local.u32 [%rd2], %r231;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd88;
.param .b64 param1;
st.param.b64 [param1+0], %rd20;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r232, [retval0+0];
}
ld.global.u8 %r233, [%rd86+1];
st.local.u32 [%rd2], %r233;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd88;
.param .b64 param1;
st.param.b64 [param1+0], %rd20;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r234, [retval0+0];
}
ld.global.u8 %r235, [%rd86+2];
st.local.u32 [%rd2], %r235;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd88;
.param .b64 param1;
st.param.b64 [param1+0], %rd20;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r236, [retval0+0];
}
ld.global.u8 %r237, [%rd86+3];
st.local.u32 [%rd2], %r237;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd88;
.param .b64 param1;
st.param.b64 [param1+0], %rd20;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r238, [retval0+0];
}
add.s32 %r263, %r263, 4;
add.s32 %r262, %r262, -4;
setp.ne.s32 %p38, %r262, 0;
@%p38 bra $L__BB0_47;
$L__BB0_48:
setp.eq.s32 %p39, %r264, 0;
@%p39 bra $L__BB0_51;
cvt.s64.s32 %rd90, %r263;
add.s64 %rd102, %rd1, %rd90;
mov.u64 %rd91, $str$1;
cvta.global.u64 %rd92, %rd91;
可以发现分了五段,每段加密后会出现gift X,给出这段加密后的输出,便于调试
$L__BB0_50:
.pragma "nounroll";
ld.global.u8 %r239, [%rd102];
st.local.u32 [%rd2], %r239;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd92;
.param .b64 param1;
st.param.b64 [param1+0], %rd20;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r240, [retval0+0];
}
add.s64 %rd102, %rd102, 1;
add.s32 %r264, %r264, -1;
setp.ne.s32 %p40, %r264, 0;
@%p40 bra $L__BB0_50;
$L__BB0_51:
mov.u64 %rd94, $str$2;
cvta.global.u64 %rd95, %rd94;
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd95;
.param .b64 param1;
st.param.b64 [param1+0], %rd84;
.param .b32 retval0;
call.uni (retval0),
vprintf,
(
param0,
param1
);
ld.param.b32 %r241, [retval0+0];
}
$L__BB0_52:
bar.sync 0;
cvt.u16.u32 %rs54, %r4;
ld.global.u8 %rs55, [%rd3];
xor.b16 %rs56, %rs55, %rs54;
st.global.u8 [%rd3], %rs56;
ret;
}
b[]={}
for(int i=0;i<len;i++)
a[i]=a[i]^(i*73+0xAC)
a[i]=((a[i]&0xf)<<4)|((a[i]&0xf0)>>4)&0xff
for(int t=0;t<len;t++)
for(int j=0;j<5;j++)
for (int i = 0; i < 10485760; i++)
{
a[t]= (T[a[t]] << 4) | (T[a[t]] >> 4);
a[t] ^= (i & 0xff);
}
gift1:56 ef 80 ec 00 5e 77 ed 74 76 e1 25 b6 77 8f 9f 9d 14 83 62 90 c3 43 2e 55
3c cd c8 fd 70 17 cc 04 f4 3a d7 30 ad f7 9f a0 b8 8d 52 11 fb fa f2 f6 87 82 a5
60 86 9e 18 b1 06 de ac a6 57 69 3e bd bc 5a 1e 4a f1 93 a3 5d f3 6c 2c ce db 32
d2 cb 9c 6f b5 f5 0c 72 89 ef 4f 77 ed 68 38 8e ae 80 84 ab d5 2b a2 9b c5 e6 56
1f b6 85 74 58 95 99 e3 cf 53 98 8b a1 44 24 1c 8a 7c 76 46 c1 00 b0 c9 79 dd a8
51 34 b2 37 d3 be 66 59 12 5e f8 e8 42 ee 3b 75 3d 6e 0d 21 7d d9 3f 8c 29 67 b9
73 10 da 08 6a bb 39 88 ea 5b a4 1b bf 0f 2d 4e dc 35 13 28 e2 94 8f 71 1a a7 96
c6 e0 c4 2f d4 81 ec 33 78 63 23 22 c7 41 09 7b 48 c0 b3 ff 07 45 7a 91 4b 65 31
d6 a9 19 16 fe 0a df 26 e5 d1 27 f9 b7 e4 e1 25 05 02 5f 64 03 d8 4d eb 01 ba f0
7f 0e 50 5c 47 7e ca e7 9a 4c 2a 92 fc b4 c2
for(int i=0;i<len;i++)
b[i]=a[i]^0xAC^a[i+1]
b[len]=a[len]^0xAc^b[0]
gift2:15 c3 c0 40 f2 85 36 35 ae 3b 68 3f 6d 54 bc ae 25 3b 4d 5e ff 2c c1 d7 c5
5d a9 99 21 cb 77 64 5c 62 41 4b 31 f6 c4 93 b4 99 73 ef 46 ad a4 a8 dd a9 8b 69
4a b4 2a 05 1b 74 de a6 5d 92 fb 2f ad 4a e8 f8 17 ce 9c 52 02 33 ec 4e b9 45 4c
b5 fb 5f 76 ec 55 d2 57 ca 0c 94 36 29 fc 1a 8c 82 a8 83 d2 52 25 95 f2 8f 1c e5
05 9f 5d 80 61 a0 d6 80 30 67 bf 86 49 cc 94 3a 5a a6 9c 2b 6d 1c d5 1c 08 d9 55
c9 2a 29 48 c1 74 93 e7 e0 0a bc 06 00 79 e2 e4 ff cf 80 f0 08 4a 1f 09 e2 72 66
cf 66 7e ce 7d 2e 1d ce 1d 53 13 08 1c 8e cf 3e 45 8a 97 66 da b7 52 c7 11 9d fc
8a 88 47 57 f9 c1 73 e7 b7 ec ad 49 2a e4 de 9f 24 df e0 54 ee 93 47 76 82 f8 4b
d3 1c a3 44 58 79 55 6f 98 5a 72 e2 ff a9 68 8c ab f1 97 cb 77 39 0a 46 17 e6 23
dd f2 a0 b7 95 18 81 d1 7a ca 14 c2 e4 da 7b
for(int i=0;i<len;i+=2)
{
temp=b[i];
b[i]=b[i+1];
b[i]=temp;
}
gift3:c3 15 40 c0 85 f2 35 36 3b ae 3f 68 54 6d ae bc 3b 25 5e 4d 2c ff d7 c1 5d
c5 99 a9 cb 21 64 77 62 5c 4b 41 f6 31 93 c4 99 b4 ef 73 ad 46 a8 a4 a9 dd 69 8b
b4 4a 05 2a 74 1b a6 de 92 5d 2f fb 4a ad f8 e8 ce 17 52 9c 33 02 4e ec 45 b9 b5
4c 5f fb ec 76 d2 55 ca 57 94 0c 29 36 1a fc 82 8c 83 a8 52 d2 95 25 8f f2 e5 1c
9f 05 80 5d a0 61 80 d6 67 30 86 bf cc 49 3a 94 a6 5a 2b 9c 1c 6d 1c d5 d9 08 c9
55 29 2a c1 48 93 74 e0 e7 bc 0a 00 06 e2 79 ff e4 80 cf 08 f0 1f 4a e2 09 66 72
66 cf ce 7e 2e 7d ce 1d 53 1d 08 13 8e 1c 3e cf 8a 45 66 97 b7 da c7 52 9d 11 8a
fc 47 88 f9 57 73 c1 b7 e7 ad ec 2a 49 de e4 24 9f e0 df ee 54 47 93 82 76 4b f8
1c d3 44 a3 79 58 6f 55 5a 98 e2 72 a9 ff 8c 68 f1 ab cb 97 39 77 46 0a e6 17 dd
23 a0 f2 95 b7 81 18 7a d1 14 ca e4 c2 7b da
for(int i=1;i<len;i+=2)
{
temp=b[i];
b[i]=b[i+1];
b[i]=temp;
}
temp=b[0];
b[0]=b[len];
b[len]=temp;
gift4:da 40 15 85 c0 35 f2 3b 36 3f ae 54 68 ae 6d 3b bc 5e 25 2c 4d d7 ff 5d c1
99 c5 cb a9 64 21 62 77 4b 5c f6 41 93 31 99 c4 ef b4 ad 73 a8 46 a9 a4 69 dd b4
8b 05 4a 74 2a a6 1b 92 de 2f 5d 4a fb f8 ad ce e8 52 17 33 9c 4e 02 45 ec b5 b9
5f 4c ec fb d2 76 ca 55 94 57 29 0c 1a 36 82 fc 83 8c 52 a8 95 d2 8f 25 e5 f2 9f
1c 80 05 a0 5d 80 61 67 d6 86 30 cc bf 3a 49 a6 94 2b 5a 1c 9c 1c 6d d9 d5 c9 08
29 55 c1 2a 93 48 e0 74 bc e7 00 0a e2 06 ff 79 80 e4 08 cf 1f f0 e2 4a 66 09 66
72 ce cf 2e 7e ce 7d 53 1d 08 1d 8e 13 3e 1c 8a cf 66 45 b7 97 c7 da 9d 52 8a 11
47 fc f9 88 73 57 b7 c1 ad e7 2a ec de 49 24 e4 e0 9f ee df 47 54 82 93 4b 76 1c
f8 44 d3 79 a3 6f 58 5a 55 e2 98 a9 72 8c ff f1 68 cb ab 39 97 46 77 e6 0a dd 17
a0 23 95 f2 81 b7 7a 18 14 d1 e4 ca 7b c2 c3
v0=b[i]
v1=b[i+1]
delta=-239350328
a0 =-1640531527
a1 = 1013904242
a2 = -626627285
a3 = 2027808484
a4 = 387276957
a5 =-239350328
for (int i = 0; i < 10485760; i+=8)
v0+=((v1<<4)-1556008596)^((v1>>5)-939442524)^(v1+a0);
v1+=((v0<<4)+1013984242)^((v0>>5)+338241895)^(v0+a0);
v0+=((v1<<4)-1556808596)^((v1>>5)-939442524)^(v1+a1);
v1+=((v0<<4)+1013904242)^((v0>>5)+338241895)^(v0+a1);
v0+=((v1<<4)-1556008596)^((v1>>5)-939442524)^(v1+a2);
v1+=((v0<<4)+1013984242)^((v0>>5)+338241895)^(v0+a2);
v0+=((v1<<4)-1556888596)^((v1>>5)-939442524)^(v1+a3);
v1+=((v0<<4)+1013904242)^((v0>>5)+338241895)^(v0+a3);
v0+=((v1<<4)-1556008596)^((v1>>5)-939442524)^(v1+a4);
v1+=((v0<<4)+1013904242)^((v0>>5)+338241895)^(v0+a4);
v0+=((v1<<4)-1556008596)^((v1>>5)-939442524)^(v1+a5-1013904242);
v1+=((v0<<4)+1013904242)^((v0>>5)+338241895)^(v0+a5-1013904242);
v0+=((v1<<4)-1556008596)^((v1>>5)-939442524)^(v1+a5+1640531527);
v1+=((v0<<4)+1013904242)^((v0>>5)+338241895)^(v0+a5+1640531527);
v0+=((v1<<4)-1556808596)^((v1>>5)-939442524)^(v1+a5);
v1+=((v0<<4)+1013904242)^((v0>>5)+338241895)^(v0+a5);
a0 += delta;
a1 += delta;
a2 += delta;
a3 += delta;
a4 += delta;
a5 += delta;
gift5:48 ef 7b b4 4e 3a 24 5f 4c 95 af 03 0e 1f b1 39 55 37 99 41 6a 78 1e a1 cb
1b 57 28 c0 84 d5 fe 30 51 7a 11 57 12 ea 24 c4 fb 90 6a f0 5e 8e 25 51 01 98 28
95 5a 5e 84 66 1d 26 65 dc ae 94 db f3 0a 02 21 c9 68 2d 7f 82 5c 99 94 34 3b ee
d1 79 b5 c3 68 b5 82 25 a0 f6 cf bb cd 5a 19 e8 b3 76 0f 41 64 bd 2e fa b9 00 df
5a 3c b1 02 69 08 40 0e 52 ea 97 0b 16 51 55 dc 81 a4 dd 57 b0 15 10 4d e1 d4 42
6c d2 25 e1 70 a0 37 b6 2b c8 45 d5 52 14 53 00 6f a5 b4 56 7f 64 ba 41 66 2b 94
7a ca 0d cc 00 cf a4 9d b9 58 a4 07 24 8e 9d ff 12 6c 88 06 af 24 2f 3d ca f4 a6
49 93 0e db 65 ec 5c 8d 43 97 07 6c 34 ca d1 26 66 cc 87 5b b9 92 3d 1e d8 a7 12
b6 d7 fa 44 8a 72 a0 82 c0 7f a6 00 f9 23 61 a3 91 b7 7b 4e c2 5f ae 9c b6 e9 20
e8 6e 54 1b 35 09 bb 21 31 12 fd 8e c1 c5 5f
for(int i=0;i<len;i++)
b[i]^=i;
final:
48 EE 79 B7 4A 3F 22 58 44 9C A5 08 02 12 BF 36
45 26 8B 52 7E 6D 08 B6 D3 02 4D 33 DC 99 CB E1
10 70 58 32 73 37 CC 03 EC D2 BA 41 DC 73 A0 0A
61 30 AA 1B A1 6F 68 B3 5E 24 1C 5E E0 93 AA E4
B3 4B 40 62 8D 2D 6B 38 CA 15 D3 DF 78 76 A0 9E
29 E4 91 3B E1 D7 73 F7 AE 96 E1 96 06 44 B6 EC
16 6E 23 07 D9 4B 9C DE 68 B6 30 57 DD 6F 07 67
30 7F 20 99 E3 7E 60 26 2D A5 FB DF A1 2A CE 6A
90 CC 63 57 C6 E9 54 A2 69 F9 2A BC 3A A6 46 CA
45 C3 86 C0 94 FA 33 23 CE E6 FE 21 DD FB B5 0B
DA 6B AF 6F A4 6A 02 3A 11 F1 0E AC 88 23 33 50
A2 DD 3A B5 1B 91 99 8A 72 4D 1C F2 2F B3 65 DA
2C 9D 4F 80 53 C2 AA F3 02 18 EC AD 00 4A 95 76
42 EC CC 0B 73 C7 60 00 22 9D 50 A9 7C 5F 1E A0
46 E1 1B C0 85 46 77 50 93 A7 28 B4 42 71 58 06
D0 19 9C A7 EF C0 FF 4C D9 C8 E8 06 72 3C 3B A0
那么我们可以写出对应的解密脚本,这里由于不会cuda的语法最后只能学习使用多线程进行flag的解密
脚本如下:
typedef struct {
uint8_t *input;
uint8_t *output;
size_t block_offset; // 全局字节偏移量
size_t block_size; // 当前块的实际大小
} DecryptTask;
const uint8_t T[256] =
{99,124,119,123,242,107,111,197,48,1,103,43,254,215,171,118,202,130,201,125,250,
89,71,240,173,212,162,175,156,164,114,192,183,253,147,38,54,63,247,204,52,165,22
9,241,113,216,49,21,4,199,35,195,24,150,5,154,7,18,128,226,235,39,178,117,9,131,
44,26,27,110,90,160,82,59,214,179,41,227,47,132,83,209,0,237,32,252,177,91,106,2
03,190,57,74,76,88,207,208,239,170,251,67,77,51,133,69,249,2,127,80,60,159,168,8
1,163,64,143,146,157,56,245,188,182,218,33,16,255,243,210,205,12,19,236,95,151,6
8,23,196,167,126,61,100,93,25,115,96,129,79,220,34,42,144,136,70,238,184,20,222,
94,11,219,224,50,58,10,73,6,36,92,194,211,172,98,145,149,228,121,231,200,55,109,
141,213,78,169,108,86,244,234,101,122,174,8,186,120,37,46,28,166,180,198,232,221
,116,31,75,189,139,138,112,62,181,102,72,3,246,14,97,53,87,185,134,193,29,158,22
5,248,152,17,105,217,142,148,155,30,135,233,206,85,40,223,140,161,137,13,191,230
,66,104,65,153,45,15,176,84,187,22};
const uint8_t RT[256] =
{82,9,106,213,48,54,165,56,191,64,163,158,129,243,215,251,124,227,57,130,155,47,
255,135,52,142,67,68,196,222,233,203,84,123,148,50,166,194,35,61,238,76,149,11,6
6,250,195,78,8,46,161,102,40,217,36,178,118,91,162,73,109,139,209,37,114,248,246
,100,134,104,152,22,212,164,92,204,93,101,182,146,108,112,72,80,253,237,185,218,
94,21,70,87,167,141,157,132,144,216,171,0,140,188,211,10,247,228,88,5,184,179,69
,6,208,44,30,143,202,63,15,2,193,175,189,3,1,19,138,107,58,145,17,65,79,103,220,
234,151,242,207,206,240,180,230,115,150,172,116,34,231,173,53,133,226,249,55,232
,28,117,223,110,71,241,26,113,29,41,197,137,111,183,98,14,170,24,190,27,252,86,6
2,75,198,210,121,32,154,219,192,254,120,205,90,244,31,221,168,51,136,7,199,49,17
7,18,16,89,39,128,236,95,96,81,127,169,25,181,74,13,45,229,122,159,147,201,156,2
39,160,224,59,77,174,42,245,176,200,235,187,60,131,83,153,97,23,43,4,126,186,119
,214,38,225,105,20,99,85,33,12,125};
voidtea_decrypt(uint32_t *a, uint32_t *b){
// 原tea_decrypt实现保持不变
uint32_t total0 = (uint32_t)(-1640531527LL - 239350328LL * (10485760 / 8));
uint32_t total1 = (uint32_t)(1013904242LL - 239350328LL * (10485760 / 8));
uint32_t total2 = (uint32_t)(-626627285LL - 239350328LL * (10485760 / 8));
uint32_t total3 = (uint32_t)(2027808484LL - 239350328LL * (10485760 / 8));
uint32_t total4 = (uint32_t)(387276957LL - 239350328LL * (10485760 / 8));
uint32_t total5 = (uint32_t)(-239350328LL - 239350328LL * (10485760 / 8));
l0 += 239350328;
total1 += 239350328;
total2 += 239350328;
total3 += 239350328;
total4 += 239350328;
total5 += 239350328;
// Round 1
*b -= ((*a << 4) + 1013904242) ^ ((*a >> 5) + 338241895) ^ (*a +
total5);
*a -= ((*b << 4) - 1556008596) ^ ((*b >> 5) - 939442524) ^ (*b +
total5);
// Round 2
uint32_t temp_total = total5 + 1640531527;
*b -= ((*a << 4) + 1013904242) ^ ((*a >> 5) + 338241895) ^ (*a +
temp_total);
*a -= ((*b << 4) - 1556008596) ^ ((*b >> 5) - 939442524) ^ (*b +
temp_total);
// Round 3
temp_total = total5 - 1013904242;
*b -= ((*a << 4) + 1013904242) ^ ((*a >> 5) + 338241895) ^ (*a +
temp_total);
*a -= ((*b << 4) - 1556008596) ^ ((*b >> 5) - 939442524) ^ (*b +
temp_total);
// Round 4
*b -= ((*a << 4) + 1013904242) ^ ((*a >> 5) + 338241895) ^ (*a +
total4);
*a -= ((*b << 4) - 1556008596) ^ ((*b >> 5) - 939442524) ^ (*b +
total4);
// Round 5
*b -= ((*a << 4) +
38241895) ^ (*a +
total0);
*a -= ((*b << 4) - 1556008596) ^ ((*b >> 5) - 939442524) ^ (*b +
total0);
}
}
voidprocess_block(DecryptTask *task){
uint8_t *v = (uint8_t *)malloc(task->block_size);
memcpy(v, task->input, task->block_size);
// Gift5:使用全局字节偏移
for (int i = 0; i < task->block_size; i++) {
v[i] ^= ( (task->block_offset + i) & 0xFF );
}
// 转换为uint32_t数组(小端序)
int num_words = task->block_size / 4;
uint32_t *words = (uint32_t *)malloc(num_words * sizeof(uint32_t));
for (int i = 0; i < num_words; i++) {
words[i] = (v[i*4+3] << 24) | (v[i*4+2] << 16) | (v[i*4+1] << 8) |
v[i*4];
}
free(v);
// TEA解密
for (int i = 0; i < num_words; i += 2) {
tea_decrypt(&words[i], &words[i+1]);
}
// 转换回字节数组
uint8_t *v1 = (uint8_t *)words;
int v1_len = num_words * 4;
// Gift4
for (int i = 1; i < v1_len - 1; i += 2) {
uint8_t tmp = v1[i];
v1[i] = v1[i+1];
v1[i+1] = tmp;
}
if (v1_len >= 1) {
uint8_t tmp = v1[0];
v1[0] = v1[v1_len-1];
i--) {
int next_idx = (i + 1) % v1_len;
v1[i] ^= (v1[next_idx] ^ 0xAC);
}
// Gift1(保持原逻辑)
for (int k = 0; k < v1_len; k++) {
for (int i = 0; i < 5; i++) {
for (int j = 10485759; j >= 0; j--) {
v1[k] ^= (j & 0xFF);
v1[k] = ((v1[k] >> 4) | (v1[k] << 4)) & 0xFF;
v1[k] = RT[v1[k]];
}
}
}
// Final processing(使用全局偏移)
for (int i = 0; i < v1_len; i++) {
v1[i] = ((v1[i] >> 4) | (v1[i] << 4)) & 0xFF;
v1[i] ^= ( ( (task->block_offset + i) * 73 + 0xAC ) & 0xFF );
}
memcpy(task->output, v1, task->block_size);
free(words);
}
intmain(){
FILE *fin = fopen("flag_enc", "rb");
if (!fin) {
perror("Failed to open input file");
return 1;
}
fseek(fin, 0, SEEK_END);
long file_size = ftell(fin);
fseek(fin, 0, SEEK_SET);
// 计算分块信息
size_t total_blocks = (file_size + BLOCK_SIZE - 1) / BLOCK_SIZE;
DecryptTask *tasks = (DecryptTask *)malloc(total_blocks *
sizeof(DecryptTask));
uint8_t *file_buffer
解完得到flag _dec,010发现是一张png图片
得到flag:
// 初始化任务队列
for (size_t i = 0; i < total_blocks; i++) {
tasks[i].input = &file_buffer[i * BLOCK_SIZE];
tasks[i].output = &output_buffer[i * BLOCK_SIZE];
tasks[i].block_offset = i * BLOCK_SIZE;
tasks[i].block_size = (i == total_blocks - 1) ? (file_size % BLOCK_SIZE)
: BLOCK_SIZE;
if (i == total_blocks - 1 && file_size % BLOCK_SIZE == 0) {
tasks[i].block_size = BLOCK_SIZE;
}
}
// 并行处理所有块
for (size_t i = 0; i < total_blocks; i++) {
process_block(&tasks[i]);
}
// 顺序写入结果
FILE *fout = fopen("flag_dec", "wb");
if (!fout) {
perror("Failed to open output file");
free(file_buffer);
free(output_buffer);
free(tasks);
return 1;
}
for (size_t i = 0; i < total_blocks; i++) {
fwrite(tasks[i].output, 1, tasks[i].block_size, fout);
}
fclose(fout);
free(file_buffer);
free(output_buffer);
free(tasks);
return 0;
}
aliyunctf{CUDA@PTX@EASY@}
原文始发于微信公众号(SKSEC):【表哥有话说 第113期】浅析CUDA逆向与PTX汇编
免责声明:文章中涉及的程序(方法)可能带有攻击性,仅供安全研究与教学之用,读者将其信息做其他用途,由读者承担全部法律及连带责任,本站不承担任何法律及连带责任;如有问题可邮件联系(建议使用企业邮箱或有效邮箱,避免邮件被拦截,联系方式见首页),望知悉。
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论