所以,对于进行加壳和脱壳操作的安全研究人员和安全工程师来说,熟悉PE结构是必要的,因为它提供了深入理解和操作可执行文件的基础。这样才能更好地进行反病毒、反调试,以及免杀应对等工作,确保软件和系统的安全。
0x00 前置基础
-
拥有NASM/C++基础
-
了解数据类型和字节序
-
掌握动/静态逆向分析工具
小复习:
BYTE类型 占2个位置, 1字节
WORD类型 占4个位置, 2字节
DOWRD类型 占8个位置, 4个字节
内存中的小端序:
人类读取顺序:12345678机器读取顺序:78563412
0x01 什么是PE结构?
以下为010 Editor中打开的所有PE结构截图:
存储了早期的MS-DOS可执行文件的信息,用于向后兼容
包含WINDOWS PE的主要信息内有三个部分构成:
0x02 PE格式详解
一、DOS头
typedef struct _IMAGE_DOS_HEADER { // DOS .EXE header
WORD e_magic; // Magic number
WORD e_cblp; // Bytes on last page of file
WORD e_cp; // Pages in file
WORD e_crlc; // Relocations
WORD e_cparhdr; // Size of header in paragraphs
WORD e_minalloc; // Minimum extra paragraphs needed
WORD e_maxalloc; // Maximum extra paragraphs needed
WORD e_ss; // Initial (relative) SS value
WORD e_sp; // Initial SP value
WORD e_csum; // Checksum
WORD e_ip; // Initial IP value
WORD e_cs; // Initial (relative) CS value
WORD e_lfarlc; // File address of relocation table
WORD e_ovno; // Overlay number
WORD e_res[4]; // Reserved words
WORD e_oemid; // OEM identifier (for e_oeminfo)
WORD e_oeminfo; // OEM information; e_oemid specific
WORD e_res2[10]; // Reserved words
LONG e_lfanew; // File address of new exe header
} IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER;
DOS头重点字段
WORD e_magic: 用于标识该文件是否为合法的可执行文件。其值是一个常数为4D 5A,在010Editor中打开查看ASCII字符为MZ。
MZ标志用于表示可执行文件的起始位置。当时的可执行文件格式以此标志开头,以标识它是可执行文件,并来,随着Windows操作系统的发展,微软引入了新的PE文件格式作为替代,但保留了MZ标志作为文件的有效性验证。在Windows中,MZ标志成为PE文件的前两个信息。
Tips: MZ标志的确是来自Mark Zbikowski的名字的前两个字母,但并不是纪念他,而是作为标识DOS可执行文件格式的一种方式。
LONG e_lfanew:指示PE文件头(PE header)的偏移量。(用于寻找NT头相对文件起始地址的偏移,那么00 E0就是NT头的起始位置)
二、NT头
咱们可以从C++中的winnt.h库中找到具体的DOS头信息,跟进去搜索_IMAGE_NT_HEADER即可看到NT 头的参数
typedef struct _IMAGE_NT_HEADERS {
DWORD Signature; //PE签名
IMAGE_FILE_HEADER FileHeader; //文件头
IMAGE_OPTIONAL_HEADER32 OptionalHeader; // 可选头
} IMAGE_NT_HEADERS32, *PIMAGE_NT_HEADERS32;
以下为010Editor中解析NT头的截图:
1.PE签名
DWORD Signature:其值是一个常数为50 45,在010Editor中打开查看ASCII字符为PE..
2.文件头
跟进IMAGE_FILE_HEADER便可看到文件头参数
typedef struct _IMAGE_FILE_HEADER {
WORD Machine;
WORD NumberOfSections;
DWORD TimeDateStamp;
DWORD PointerToSymbolTable;
DWORD NumberOfSymbols;
WORD SizeOfOptionalHeader;
WORD Characteristics;
} IMAGE_FILE_HEADER, *PIMAGE_FILE_HEADER;
以下为010Editor中打开的文件头截图:
文件头重点字段
WORD Machine:用于识别当前程序支持的CPU架构,我们以上010Editor中解析文件头的截图中看到值为014c,那么我们从以下对应表中寻找014c为Intel 386.,386对应的是x86指令集架构的微处理器,那么我们可以得出该程序为32位的。
#define IMAGE_SIZEOF_FILE_HEADER 20
#define IMAGE_FILE_RELOCS_STRIPPED 0x0001 // Relocation info stripped from file.
#define IMAGE_FILE_EXECUTABLE_IMAGE 0x0002 // File is executable (i.e. no unresolved external references).
#define IMAGE_FILE_LINE_NUMS_STRIPPED 0x0004 // Line nunbers stripped from file.
#define IMAGE_FILE_LOCAL_SYMS_STRIPPED 0x0008 // Local symbols stripped from file.
#define IMAGE_FILE_AGGRESIVE_WS_TRIM 0x0010 // Aggressively trim working set
#define IMAGE_FILE_LARGE_ADDRESS_AWARE 0x0020 // App can handle >2gb addresses
#define IMAGE_FILE_BYTES_REVERSED_LO 0x0080 // Bytes of machine word are reversed.
#define IMAGE_FILE_32BIT_MACHINE 0x0100 // 32 bit word machine.
#define IMAGE_FILE_DEBUG_STRIPPED 0x0200 // Debugging info stripped from file in .DBG file
#define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP 0x0400 // If Image is on removable media, copy and run from the swap file.
#define IMAGE_FILE_NET_RUN_FROM_SWAP 0x0800 // If Image is on Net, copy and run from the swap file.
#define IMAGE_FILE_SYSTEM 0x1000 // System File.
#define IMAGE_FILE_DLL 0x2000 // File is a DLL.
#define IMAGE_FILE_UP_SYSTEM_ONLY 0x4000 // File should only be run on a UP machine
#define IMAGE_FILE_BYTES_REVERSED_HI 0x8000 // Bytes of machine word are reversed.
#define IMAGE_FILE_MACHINE_UNKNOWN 0
#define IMAGE_FILE_MACHINE_TARGET_HOST 0x0001 // Useful for indicating we want to interact with the host and not a WoW guest.
#define IMAGE_FILE_MACHINE_I386 0x014c // Intel 386.
#define IMAGE_FILE_MACHINE_R3000 0x0162 // MIPS little-endian, 0x160 big-endian
#define IMAGE_FILE_MACHINE_R4000 0x0166 // MIPS little-endian
#define IMAGE_FILE_MACHINE_R10000 0x0168 // MIPS little-endian
#define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 // MIPS little-endian WCE v2
#define IMAGE_FILE_MACHINE_ALPHA 0x0184 // Alpha_AXP
#define IMAGE_FILE_MACHINE_SH3 0x01a2 // SH3 little-endian
#define IMAGE_FILE_MACHINE_SH3DSP 0x01a3
#define IMAGE_FILE_MACHINE_SH3E 0x01a4 // SH3E little-endian
#define IMAGE_FILE_MACHINE_SH4 0x01a6 // SH4 little-endian
#define IMAGE_FILE_MACHINE_SH5 0x01a8 // SH5
#define IMAGE_FILE_MACHINE_ARM 0x01c0 // ARM Little-Endian
#define IMAGE_FILE_MACHINE_THUMB 0x01c2 // ARM Thumb/Thumb-2 Little-Endian
#define IMAGE_FILE_MACHINE_ARMNT 0x01c4 // ARM Thumb-2 Little-Endian
#define IMAGE_FILE_MACHINE_AM33 0x01d3
#define IMAGE_FILE_MACHINE_POWERPC 0x01F0 // IBM PowerPC Little-Endian
#define IMAGE_FILE_MACHINE_POWERPCFP 0x01f1
#define IMAGE_FILE_MACHINE_IA64 0x0200 // Intel 64
#define IMAGE_FILE_MACHINE_MIPS16 0x0266 // MIPS
#define IMAGE_FILE_MACHINE_ALPHA64 0x0284 // ALPHA64
#define IMAGE_FILE_MACHINE_MIPSFPU 0x0366 // MIPS
#define IMAGE_FILE_MACHINE_MIPSFPU16 0x0466 // MIPS
#define IMAGE_FILE_MACHINE_AXP64 IMAGE_FILE_MACHINE_ALPHA64
#define IMAGE_FILE_MACHINE_TRICORE 0x0520 // Infineon
#define IMAGE_FILE_MACHINE_CEF 0x0CEF
#define IMAGE_FILE_MACHINE_EBC 0x0EBC // EFI Byte Code
#define IMAGE_FILE_MACHINE_AMD64 0x8664 // AMD64 (K8)
#define IMAGE_FILE_MACHINE_M32R 0x9041 // M32R little-endian
#define IMAGE_FILE_MACHINE_ARM64 0xAA64 // ARM64 Little-Endian
#define IMAGE_FILE_MACHINE_CEE 0xC0EE
Tips: TimeDateStamp字段不仅受到编译器或链接器的影响,还受到其他因素 (如构建系统或源代码管理工具的设置)的影响。 因此,它可能不一定准确反映文件的实际创建时间
3.可选头
跟进IMAGE_OPTIONAL_HEADER便可看到文件头参数(这里我跟进的是32位的)
以下是在010Editor中打开的可选头:
Optional header format.
//
typedef struct _IMAGE_OPTIONAL_HEADER {
//
Standard fields.
//
WORD Magic;
BYTE MajorLinkerVersion;
BYTE MinorLinkerVersion;
DWORD SizeOfCode;
DWORD SizeOfInitializedData;
DWORD SizeOfUninitializedData;
DWORD AddressOfEntryPoint;
DWORD BaseOfCode;
DWORD BaseOfData;
//
NT additional fields.
//
DWORD ImageBase;
DWORD SectionAlignment;
DWORD FileAlignment;
WORD MajorOperatingSystemVersion;
WORD MinorOperatingSystemVersion;
WORD MajorImageVersion;
WORD MinorImageVersion;
WORD MajorSubsystemVersion;
WORD MinorSubsystemVersion;
DWORD Win32VersionValue;
DWORD SizeOfImage;
DWORD SizeOfHeaders;
DWORD CheckSum;
WORD Subsystem;
WORD DllCharacteristics;
DWORD SizeOfStackReserve;
DWORD SizeOfStackCommit;
DWORD SizeOfHeapReserve;
DWORD SizeOfHeapCommit;
DWORD LoaderFlags;
DWORD NumberOfRvaAndSizes;
IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
IMAGE_OPTIONAL_HEADER32, *PIMAGE_OPTIONAL_HEADER32;
可选头重点字段
WORD Magic:一个WORD类型2字节的字段,表示镜像的状态。如010Editor中解析可选头的截图中值为01 0B, 那么0x10B意味这个是一个32位镜像文件。
0x10B: 32位镜像文件
0x107: ROM镜像文件
0x20B: 64位镜像文件
DWORD SizeOfCode:【代码段的大小】一般表示.text段的总大小(实际的有功能的代码占的大小)。
DWORD SizeOfInitializedData:【已初始化数据段大小】一般表示.data段的总大小 (动态变量的)。
DWORD SizeOfUninitializedData:【未初始化数据段大小】一般表示.bss段的总大小 (类的初始化的一些静态变量的)。
DWORD AddressOfEntryPoint:【入口点地址】它指示了程序的执行起始点。当可执行文件加载到内存中时,操作系统会从该地址开始执行代码(注意:AddressOfEntryPoint是可执行文件的入口点的偏移量,而不是地址)。
DWORD BaseOfCode:【代码段基址】表示.text代码段在内存中的起始地址(也就是写的代码第一行的位置)。
DWORD BaseOfData:【数据段基址】用于表示数据段.data在内存中的基地址。
DWORD ImageBase:【镜像基址】 它是可执行文件(PE)加载到内存中时的起始地址。它表示可执行文件在内存中的基地址,也就是加载时的基础地址。所有在可执行文件中定义的相对地址都是相对于ImageBase的偏移量。
DWORD SectionAlignment: 【区段对齐】指示可执行文件中各个区段在内存中的对齐方式。
DWORD FileAlignment:【文件对齐】用来对齐镜像文件的节中的原始数据的对齐因子(以字节计)。它应该是界于512和64K之间的2的幂(包括这两个边界值)。默认是512。如果SectionAlignment小于相应系统的页面大小,那么FileAlignment必须与SectionAlignment相等。
DWORD SizeOfImage:【PE在内存中的总大小】当镜像被加载进内存时的大小,包括所有的文件头。向上舍入为SectionAlignment的倍数。
DWORD SizeOfHeaders:【头大小】PE中所有头的总大小 。
DWORD CheckSum:【校验盒】现在除了游戏几乎使用的很少了。
WORD Subsystem:【子系统】
子系统表:
值 | 描述 |
---|---|
0 | 未知子系统 |
1 | 设备驱动程序和Native Windows进程 |
2 | Windows图形用户界面(GUI)子系统(一般程序) |
3 | Windows字符模式(CUI)子系统(从命令提示符启动的) |
7 | Posix字符模式子系统 |
9 | Windows CE |
10 | 可扩展固件接口(EFI)应用程序 |
11 | 带引导服务的EFI驱动程序 |
12 | 带运行时服务的EFI驱动程序 |
13 | EFI ROM镜像 |
14 | XBOX |
WORD DllCharacteristics:DLL标识,从010Editor中打开的可选头来看该字段的值为85 40
// DllCharacteristics Entries
// IMAGE_LIBRARY_PROCESS_INIT 0x0001 // Reserved.
// IMAGE_LIBRARY_PROCESS_TERM 0x0002 // Reserved.
// IMAGE_LIBRARY_THREAD_INIT 0x0004 // Reserved.
// IMAGE_LIBRARY_THREAD_TERM 0x0008 // Reserved.
以下是关闭aslr的OD截图
以下是开启aslr的OD截图
struct IMAGE_DATA_DIRECTORY_ARRAY DataDirArray //指向的是一个指针,里面的内容指向的是描述表
struct IMAGE_DATA_DIRECTORY Import //里面包含了导入表的起始地址和大小。
DWORD VirtualAddress //指向导入表的起始地址在内存中的虚拟地址。
DWORD Size //导入表的大小,以字节为单位。
struct IMAGE_IMPORT_DESCRIPTOR ImportDescriptor[0] //里面有dll和函数的地址
三、节头
咱们可以从C++中的winnt.h库中找到具体节头信息,跟进去搜索IMAGE_SECTION_HEADER即可看到节头的参数
typedef struct _IMAGE_SECTION_HEADER {
BYTE Name[IMAGE_SIZEOF_SHORT_NAME];
union {
DWORD PhysicalAddress;
DWORD VirtualSize;
Misc;
DWORD VirtualAddress;
DWORD SizeOfRawData;
DWORD PointerToRawData;
DWORD PointerToRelocations;
DWORD PointerToLinenumbers;
WORD NumberOfRelocations;
WORD NumberOfLinenumbers;
DWORD Characteristics;
IMAGE_SECTION_HEADER, *PIMAGE_SECTION_HEADER;
0x03 PE的运行过程
void* load_PE(char* PE_data);
void fix_iat(char*, IMAGE_NT_HEADERS*);
void fix_base_reloc(char* p_image_base, IMAGE_NT_HEADERS* p_NT_headers);
int main(int argc, char const* argv[]) {
if (argc < 2) {
printf("missing path argumentn");
return 1;
}
FILE* exe_file = fopen(argv[1], "rb");
if (!exe_file) {
printf("error opening filen");
return 1;
}
// Get file size : put pointer at the end
fseek(exe_file, 0L, SEEK_END);
// and read its position
long int file_size = ftell(exe_file);
// put the pointer back at the beginning
fseek(exe_file, 0L, SEEK_SET);
// allocate memory and read the whole file
char* exe_file_data = (char*)malloc(file_size + 1);
// read whole file
size_t n_read = fread(exe_file_data, 1, file_size, exe_file);
if (n_read != file_size) {
printf("reading error (%d)n", n_read);
return 1;
}
// load the PE in memory
printf("[+] Loading PE filen");
//pe里面有个导入表
//字典 里面不仅要有内容 还要有内容对应的名称
void* entry = load_PE(exe_file_data);
if (entry != NULL) {
// call its entrypoint
((void (*)(void))entry)();
}
return 0;
}
void* load_PE(char* PE_data) {
IMAGE_DOS_HEADER* p_DOS_header = (IMAGE_DOS_HEADER*)PE_data;
IMAGE_NT_HEADERS* p_NT_headers = (IMAGE_NT_HEADERS*)(PE_data + p_DOS_header -
> e_lfanew);
// extract information from PE header
DWORD size_of_image = p_NT_headers->OptionalHeader.SizeOfImage;
DWORD entry_point_RVA = p_NT_headers->OptionalHeader.AddressOfEntryPoint;
DWORD size_of_headers = p_NT_headers->OptionalHeader.SizeOfHeaders;
// allocate memory
// https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi•virtualalloc
char* p_image_base = (char*)VirtualAlloc(NULL, size_of_image, MEM_RESERVE |
MEM_COMMIT, PAGE_READWRITE);
if (p_image_base == NULL) {
return NULL;
}
// copy PE headers in memory
memcpy(p_image_base, PE_data, size_of_headers);
// Section headers starts right after the IMAGE_NT_HEADERS struct, so we do some
pointer arithmetic - fu here.
IMAGE_SECTION_HEADER * sections = (IMAGE_SECTION_HEADER*)(p_NT_headers + 1);
for (int i = 0; i < p_NT_headers->FileHeader.NumberOfSections; i++) {
// calculate the VA we need to copy the content, from the RVA
// section[i].VirtualAddress is a RVA, mind it
char* dest = p_image_base + sections[i].VirtualAddress;
// check if there is Raw data to copy
if (sections[i].SizeOfRawData > 0) {
// We copy SizeOfRaw data bytes, from the offset PointerToRawData in the
file
memcpy(dest, PE_data + sections[i].PointerToRawData,
sections[i].SizeOfRawData);
}
else {
memset(dest, 0, sections[i].Misc.VirtualSize);
}
}
fix_iat(p_image_base, p_NT_headers);
fix_base_reloc(p_image_base, p_NT_headers);
// Set permission for the PE header to read only
DWORD oldProtect;
VirtualProtect(p_image_base, p_NT_headers->OptionalHeader.SizeOfHeaders,
PAGE_READONLY, &oldProtect);
for (int i = 0; i < p_NT_headers->FileHeader.NumberOfSections; ++i) {
char* dest = p_image_base + sections[i].VirtualAddress;
DWORD s_perm = sections[i].Characteristics;
DWORD v_perm = 0; // flags are not the same between virtal protect and the
section header
if (s_perm & IMAGE_SCN_MEM_EXECUTE) {
v_perm = (s_perm & IMAGE_SCN_MEM_WRITE) ? PAGE_EXECUTE_READWRITE :
PAGE_EXECUTE_READ;
}
else {
v_perm = (s_perm & IMAGE_SCN_MEM_WRITE) ? PAGE_READWRITE :
PAGE_READONLY;
}
VirtualProtect(dest, sections[i].Misc.VirtualSize, v_perm, &oldProtect);
}
return (void*)(p_image_base + entry_point_RVA);
}
void fix_iat(char* p_image_base, IMAGE_NT_HEADERS* p_NT_headers) {
IMAGE_DATA_DIRECTORY* data_directory = p_NT_headers -
> OptionalHeader.DataDirectory;
// load the address of the import descriptors array
IMAGE_IMPORT_DESCRIPTOR* import_descriptors =
(IMAGE_IMPORT_DESCRIPTOR*)(p_image_base +
data_directory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
// this array is null terminated
for (int i = 0; import_descriptors[i].OriginalFirstThunk != 0; ++i) {
// Get the name of the dll, and import it
char* module_name = p_image_base + import_descriptors[i].Name;
HMODULE import_module = LoadLibraryA(module_name);
if (import_module == NULL) {
printf("import module is null");
abort();
}
// the lookup table points to function names or ordinals => it is the IDT
IMAGE_THUNK_DATA* lookup_table = (IMAGE_THUNK_DATA*)(p_image_base +
import_descriptors[i].OriginalFirstThunk);
// the address table is a copy of the lookup table at first
// but we put the addresses of the loaded function inside => that's the IAT
IMAGE_THUNK_DATA* address_table = (IMAGE_THUNK_DATA*)(p_image_base +
import_descriptors[i].FirstThunk);
// null terminated array, again
for (int i = 0; lookup_table[i].u1.AddressOfData != 0; ++i) {
void* function_handle = NULL;
// Check the lookup table for the adresse of the function name to import
DWORD lookup_addr = lookup_table[i].u1.AddressOfData;
if ((lookup_addr & IMAGE_ORDINAL_FLAG) == 0) { // if first bit is not 1
// import by name : get the IMAGE_IMPORT_BY_NAME struct
IMAGE_IMPORT_BY_NAME* image_import = (IMAGE_IMPORT_BY_NAME*)
(p_image_base + lookup_addr);
// this struct points to the ASCII function name
char* funct_name = (char*)&(image_import->Name);
// get that function address from it's module and name
function_handle = (void*)GetProcAddress(import_module, funct_name);
}
else {
// import by ordinal, directly
function_handle = (void*)GetProcAddress(import_module,
(LPSTR)lookup_addr);
}
if (function_handle == NULL) {
printf("function handle is null");
abort();
}
// change the IAT, and put the function address inside.
address_table[i].u1.Function = (DWORD)function_handle;
}
}
}
void fix_base_reloc(char* p_image_base, IMAGE_NT_HEADERS* p_NT_headers) {
IMAGE_DATA_DIRECTORY* data_directory = p_NT_headers -
> OptionalHeader.DataDirectory;
// this is how much we shifted the ImageBase
DWORD delta_VA_reloc = ((DWORD)p_image_base) - p_NT_headers -
> OptionalHeader.ImageBase;
// if there is a relocation table, and we actually shitfted the ImageBase
if (data_directory[IMAGE_DIRECTORY_ENTRY_BASERELOC].VirtualAddress != 0 &&
delta_VA_reloc != 0) {
// calculate the relocation table address
IMAGE_BASE_RELOCATION* p_reloc =
(IMAGE_BASE_RELOCATION*)(p_image_base +
data_directory[IMAGE_DIRECTORY_ENTRY_BASERELOC].VirtualAddress);
// once again, a null terminated array
while (p_reloc->VirtualAddress != 0) {
// how any relocation in this block
// ie the total size, minus the size of the "header", divided by 2
(those are words, so 2 bytes for each)
DWORD size = (p_reloc->SizeOfBlock - sizeof(IMAGE_BASE_RELOCATION)) / 2;
// the first relocation element in the block, right after the header
(using pointer arithmetic again)
WORD* fixups = (WORD*)(p_reloc + 1);
for (int i = 0; i < size; ++i) {
// type is the first 4 bits of the relocation word
int type = fixups[i] >> 12;
// offset is the last 12 bits
int offset = fixups[i] & 0x0fff;
// this is the address we are going to change
DWORD* change_addr = (DWORD*)(p_image_base + p_reloc->VirtualAddress
+ offset);
本文部分资料引用以下文章
PE结构详解:
https://blog.csdn.net/cs2626242/article/details/79391599
PE结构详解加壳脱壳必备知识:
https://blog.csdn.net/a59a59/article/details/103214936
原文始发于微信公众号(沃克学安全):windows逆向基础-PE文件结构详解(上)
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论