LLVM字符串加密学习笔记

admin 2023年7月27日10:05:14评论12 views字数 12230阅读40分46秒阅读模式

之前看chenx6大佬的博客学习了一下编写基础的LLVM Pass,但是那个有很明显的问题是,作者为了处理Function内部重复引用的多次解密的问题,特判了引用次数,如果存在多处对global string的引用是无法进行混淆的。


但是实际的编程中很难不会引用多处字符串,所以那个只能混淆简单代码。我后面根据评论区的说法对此优化了一下,改成Function的EntryBasicBlock处解密,但是过不了llvm-dis,感觉是在alloc栈变量的时候出了问题,暂时不知道怎么解决,后面如果有能力的话再重新写一遍吧。


之后学习了一下pluto-obfuscator(https://github.com/bluesadi/Pluto-Obfuscator)项目,里面有一份GlobalEncryption.cpp,借此机会学习一下,顺便写一份New PassManager版本的。

pluto-obfuscator的全局变量加密对全局整数和数组变量进行了处理,采用的方式是编译期加密,运行期解密,就是在IR阶段对全局变量加密,然后对每个全局变量调用解密函数并添加到.ctors中,让程序在运行期进行全局构造的时候解密。

看雪有一个帖子(https://bbs.kanxue.com/thread-263107.htm)总结几种加密方式总结得比较好,搬运一下。

LLVM字符串加密学习笔记

runOnModule


首先获取Module的LLVMContext,获取所有的全局变量,添加到GVs中。
    
INIT_CONTEXT(M);
vector<GlobalVariable *> GVs;
for (GlobalVariable &GV : M.getGlobalList()) {
GVs.push_back(&GV);
}

然后筛选出需要加密的全局变量,ObfuTimes是混淆次数,默认为1。
    
for (int i = 0; i < ObfuTimes; i++) {
for (GlobalVariable *GV : GVs) {
// 只对Integer和Array类型进行加密
if (!GV->getValueType()->isIntegerTy() &&
!GV->getValueType()->isArrayTy()) {
continue;
}
// 筛出".str"全局变量,LLVM IR的metadata同样也要保留
if (GV->hasInitializer() && GV->getInitializer() &&
(GV->getName().contains(".str") || !OnlyStr)
// Do not encrypt globals having a section named "llvm.metadata"
&& !GV->getSection().equals("llvm.metadata")) {
Constant *initializer = GV->getInitializer();
ConstantInt *intData = dyn_cast<ConstantInt>(initializer);
ConstantDataArray *arrData = dyn_cast<ConstantDataArray>(initializer);

然后分别处理数组和整数类型的全局变量,在这里直接进行加密,然后调用insertArrayDecryptioninsertIntDecryption将解密函数添加到全局构造函数表中。
            
if (arrData) {
// 获取数组的长度和数组元素的大小
uint32_t eleSize = arrData->getElementByteSize();
uint32_t eleNum = arrData->getNumElements();
uint32_t arrLen = eleNum * eleSize;
char *data = const_cast<char *>(arrData->getRawDataValues().data());
char *dataCopy = new char[arrLen];
memcpy(dataCopy, data, arrLen);
// 生成密钥
uint64_t key = cryptoutils->get_uint64_t();
// A simple xor encryption
for (uint32_t i = 0; i < arrLen; i++) {
dataCopy[i] ^= ((char *)&key)[i % eleSize];
}
GV->setInitializer(ConstantDataArray::getRaw(
StringRef(dataCopy, arrLen), eleNum, arrData->getElementType()));
GV->setConstant(false);
insertArrayDecryption(M, {GV, key, eleNum});
} else if (intData) {
uint64_t key = cryptoutils->get_uint64_t();
ConstantInt *enc =
CONST(intData->getType(), key ^ intData->getZExtValue());
GV->setInitializer(enc);
GV->setConstant(false);
insertIntDecryption(M, {GV, key, 1LL});
}


insertArrayDecryption


因为在解密的方式上其实很像,这里只记录比较复杂的数组解密,这里用IRBuilder构造了一个for循环来实现解密,整数全局变量只要和密钥异或就行了,原理是一样的。

首先构造一个函数,返回值是void。

vector<Type *> args;
FunctionType *funcType =
FunctionType::get(Type::getVoidTy(M.getContext()), args, false);
string funcName = genHashedName(encGV.GV);
FunctionCallee callee = M.getOrInsertFunction(funcName, funcType);
Function *func = cast<Function>(callee.getCallee());

然后构造四个BasicBlock,用于实现for循环的几个阶段。

BasicBlock *entry = BasicBlock::Create(*CONTEXT, "entry", func);
// for(int i = 0; i < length; i++)
// 条件判断
BasicBlock *forCond = BasicBlock::Create(*CONTEXT, "for.cond", func);
// 循环体
BasicBlock *forBody = BasicBlock::Create(*CONTEXT, "for.body", func);
// i++
BasicBlock *forInc = BasicBlock::Create(*CONTEXT, "for.inc", func);
// 结束
BasicBlock *forEnd = BasicBlock::Create(*CONTEXT, "for.inc", func);

首先获取func的EntryBasicBlock,然后初始化循环变量。
    
IRBuilder<> builder(*CONTEXT);
builder.SetInsertPoint(entry);
AllocaInst *indexPtr = builder.CreateAlloca(TYPE_I32, CONST_I32(1), "i");
builder.CreateStore(CONST_I32(0), indexPtr);
builder.CreateBr(forCond);

forCond实现i < length的部分,如果i < length成立则跳转到循环体,如果不成立则跳出循环。

builder.SetInsertPoint(forCond);
LoadInst *index = builder.CreateLoad(TYPE_I32, indexPtr);
ICmpInst *cond =
cast<ICmpInst>(builder.CreateICmpSLT(index, CONST_I32(encGV.len)));
builder.CreateCondBr(cond, forBody, forEnd);

循环体内就是解密的过程了,其实就是和密钥的每一位进行异或,最后跳转到i++。

builder.SetInsertPoint(forBody);
Value *indexList[2] = {CONST_I32(0), index};
Value *ele = builder.CreateGEP(encGV.GV, ArrayRef<Value *>(indexList, 2));
ArrayType *arrTy = cast<ArrayType>(encGV.GV->getValueType());
Type *eleTy = arrTy->getElementType();
Value *encEle =
builder.CreateXor(builder.CreateLoad(ele), CONST(eleTy, encGV.key));
builder.CreateStore(encEle, ele);
builder.CreateBr(forInc);

forInc实现的就是i++了,这里比较简单,最后跳转到forCond进行条件判断,这样就实现了循环。

builder.SetInsertPoint(forInc);
builder.CreateStore(builder.CreateAdd(index, CONST_I32(1)), indexPtr);
builder.CreateBr(forCond);

forEnd就是返回ret,最后再将函数写入.ctors中,实现运行期全局构造。

builder.SetInsertPoint(forEnd);
builder.CreateRetVoid();
appendToGlobalCtors(M, func, 0);

以下是我改写成New PassManager的GlobalsEncryption.cpp,加密部分没做修改,所有代码在libObfuscator/tree/pluto-enc(https://github.com/AimiP02/libObfuscator/tree/pluto-enc)。

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

#include <cstdint>
#include <iomanip>
#include <sstream>
#include <vector>

#include "CryptoUtils.h"

using namespace llvm;

struct EncryptedGV {
GlobalVariable *GV;
uint64_t key;
uint32_t len;
};

namespace {

static cl::opt<int>
ObfuTimes("gvobfus-times", cl::init(1),
cl::desc("Run GlobalsEncryption pass <gvobfus-times> time(s)"));

static cl::opt<bool> OnlyStr("onlystr", cl::init(false),
cl::desc("Encrypt string variable only"));

class GVObfuscator : public PassInfoMixin<GVObfuscator> {
public:
GVObfuscator() {}
~GVObfuscator() {}

LLVMContext *ctx;

virtual void InsertIntDecryption(Module &M, EncryptedGV encGV);
virtual void InsertArrayDecryption(Module &M, EncryptedGV encGV);

virtual PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};

std::string GenHashedName(GlobalVariable *GV) {
Module &M = *GV->getParent();
std::string funcName =
formatv("{0}_{1:x-}", M.getName(), M.getMDKindID(GV->getName()));
SHA1 sha1;
sha1.update(funcName);
StringRef digest = sha1.final();

std::stringstream ss;
ss << std::hex;

for (size_t i = 0; i < digest.size(); i++) {
ss << std::setw(2) << std::setfill('0') << (unsigned)(digest[i] & 0xFF);
}

return ss.str();
}

void GVObfuscator::InsertIntDecryption(Module &M, EncryptedGV encGV) {
std::vector<Type *> funcArgs;
FunctionType *funcType =
FunctionType::get(Type::getVoidTy(M.getContext()), funcArgs, false);
std::string funcName = GenHashedName(encGV.GV);
FunctionCallee callee = M.getOrInsertFunction(funcName, funcType);
Function *func = cast<Function>(callee.getCallee());

BasicBlock *entry = BasicBlock::Create(*ctx, "entry", func);
IRBuilder<> builder(*ctx);
builder.SetInsertPoint(entry);
LoadInst *val = builder.CreateLoad(encGV.GV);
Value *xorVal = builder.CreateXor(
val, ConstantInt::get(encGV.GV->getValueType(), encGV.key));
builder.CreateStore(xorVal, encGV.GV);
builder.CreateRetVoid();
appendToGlobalCtors(M, func, 0);
}

void GVObfuscator::InsertArrayDecryption(Module &M, EncryptedGV encGV) {
std::vector<Type *> funcArgs;
FunctionType *funcType =
FunctionType::get(Type::getVoidTy(M.getContext()), funcArgs, false);
std::string funcName = GenHashedName(encGV.GV);
FunctionCallee callee = M.getOrInsertFunction(funcName, funcType);
Function *func = cast<Function>(callee.getCallee());

BasicBlock *entry = BasicBlock::Create(*ctx, "entry", func);
BasicBlock *forCond = BasicBlock::Create(*ctx, "for.cond", func);
BasicBlock *forBody = BasicBlock::Create(*ctx, "for.body", func);
BasicBlock *forInc = BasicBlock::Create(*ctx, "for.inc", func);
BasicBlock *forEnd = BasicBlock::Create(*ctx, "for.inc", func);

IRBuilder<> builder(*ctx);
Type *Int32Ty = builder.getInt32Ty();
builder.SetInsertPoint(entry);
AllocaInst *indexPtr =
builder.CreateAlloca(Int32Ty, ConstantInt::get(Int32Ty, 1, false), "i");
builder.CreateStore(ConstantInt::get(Int32Ty, 0), indexPtr);
builder.CreateBr(forCond);
builder.SetInsertPoint(forCond);
LoadInst *index = builder.CreateLoad(Int32Ty, indexPtr);
ICmpInst *cond = cast<ICmpInst>(
builder.CreateICmpSLT(index, ConstantInt::get(Int32Ty, encGV.len)));
builder.CreateCondBr(cond, forBody, forEnd);
builder.SetInsertPoint(forBody);
Value *indexList[2] = {ConstantInt::get(Int32Ty, 0), index};
Value *ele = builder.CreateGEP(encGV.GV, ArrayRef<Value *>(indexList, 2));
ArrayType *arrTy = cast<ArrayType>(encGV.GV->getValueType());
Type *eleTy = arrTy->getElementType();
Value *encEle = builder.CreateXor(builder.CreateLoad(ele),
ConstantInt::get(eleTy, encGV.key));
builder.CreateStore(encEle, ele);
builder.CreateBr(forInc);
builder.SetInsertPoint(forInc);
builder.CreateStore(builder.CreateAdd(index, ConstantInt::get(Int32Ty, 1)),
indexPtr);
builder.CreateBr(forCond);

builder.SetInsertPoint(forEnd);
builder.CreateRetVoid();
appendToGlobalCtors(M, func, 0);
}

PreservedAnalyses GVObfuscator::run(Module &M, ModuleAnalysisManager &MAM) {
outs() << "Pass start...n";

ctx = &M.getContext();
std::vector<GlobalVariable *> GVs;

for (auto &GV : M.globals()) {
GVs.push_back(&GV);
}

for (int i = 0; i < ObfuTimes; i++) {
outs() << "Current ObfuTimes: " << i << "n";
for (auto *GV : GVs) {
// 只对Integer和Array类型进行加密
if (!GV->getValueType()->isIntegerTy() &&
!GV->getValueType()->isArrayTy()) {
continue;
}
// 筛出".str"全局变量,LLVM IR的metadata同样也要保留
if (GV->hasInitializer() && GV->getInitializer() &&
(GV->getName().contains(".str") || !OnlyStr) &&
!GV->getName().contains("llvm.metadata")) {
Constant *initializer = GV->getInitializer();
ConstantInt *intData = dyn_cast<ConstantInt>(initializer);
ConstantDataArray *arrayData = dyn_cast<ConstantDataArray>(initializer);
// 处理数组
if (arrayData) {
// 获取数组的长度和数组元素的大小
outs() << "Get global arraydatan";
uint32_t eleSize = arrayData->getElementByteSize();
uint32_t eleNum = arrayData->getNumElements();
uint32_t arrLen = eleNum * eleSize;
outs() << "Global Variable: " << *GV << "n"
<< "Array Length: " << eleSize << " * " << eleNum << " = "
<< arrLen << "n";
char *data = const_cast<char *>(arrayData->getRawDataValues().data());
char *dataCopy = new char[arrLen];
memcpy(dataCopy, data, arrLen);
// 生成密钥
uint64_t key = cryptoutils->get_uint64_t();
for (uint32_t i = 0; i < arrLen; i++) {
dataCopy[i] ^= ((char *)&key)[i % eleSize];
}
GV->setInitializer(
ConstantDataArray::getRaw(StringRef(dataCopy, arrLen), eleNum,
arrayData->getElementType()));
GV->setConstant(false);
InsertArrayDecryption(M, {GV, key, eleNum});
}
// 处理整数
else if (intData) {
uint64_t key = cryptoutils->get_uint64_t();
ConstantInt *enc = ConstantInt::get(intData->getType(),
key ^ intData->getZExtValue());
GV->setInitializer(enc);
InsertIntDecryption(M, {GV, key, 1LL});
}
}
}
}

outs() << "Pass end...n";

return PreservedAnalyses::all();
}

} // namespace

// 注册Pass
extern "C" PassPluginLibraryInfo llvmGetPassPluginInfo() {
return {.APIVersion = LLVM_PLUGIN_API_VERSION,
.PluginName = "GVObfuscator",
.PluginVersion = LLVM_VERSION_STRING,
.RegisterPassBuilderCallbacks = [](PassBuilder &PB) {
PB.registerPipelineParsingCallback(
[](StringRef Name, ModulePassManager &MPM,
ArrayRef<PassBuilder::PipelineElement>) -> bool {
if (Name == "gvobfus") {
MPM.addPass(GVObfuscator());
return true;
}
return false;
});
}};
}

写个代码测试一下。

#include <stdio.h>

int a = 10;

void func(const char *s) {
puts("!!!The testing string!!!");
puts(s);
}

int main() {
puts("This is a testing string!");
char ch;
if ((ch = getchar()) == '6') {
printf("6666%cn", ch);
} else {
printf("WTF?!n");
}
func("!!!The testing string!!!");
return 0;
}

混淆前后的IR对比。

; 混淆前
@a = dso_local global i32 10, align 4
@.str = private unnamed_addr constant [25 x i8] c"!!!The testing string!!!0", align 1
@.str.1 = private unnamed_addr constant [26 x i8] c"This is a testing string!0", align 1
@.str.2 = private unnamed_addr constant [8 x i8] c"6666%cA0", align 1
@.str.3 = private unnamed_addr constant [7 x i8] c"WTF?!A0", align 1

; 混淆后
@a = dso_local global i32 -660274230, align 4
@.str = private unnamed_addr global [25 x i8] c"C5C5C5B08C81C4908197908D8A83C49790968D8A83C5C5C5E4", align 1
@.str.1 = private unnamed_addr global [26 x i8] c"1B'&<o&<o.o;*<;&!(o<;=&!(nO", align 1
@.str.2 = private unnamed_addr global [8 x i8] c"F6F6F6F6E5A3CAC0", align 1
@.str.3 = private unnamed_addr global [7 x i8] c"ji{21C7=", align 1
@llvm.global_ctors = appending global [5 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @cc2b6b071cb0cb47a4171a4b1d76a06963d6f5e6, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @e176df9cb36840d9378338da84362465dd29b20a, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @"2ec1d2c5cdff4d08047220c5c1ee639ae45deb5a", i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @d7db60557e37f256d7c62e73e03a42051365a247, i8* null }, { i32, void ()*, i8* } { i32 0, void ()* @"50c74527ef4457f6934c3f7d6291948f2f509e58", i8* null }]
LLVM字符串加密学习笔记

当然这对动调来说是没啥用的。

LLVM字符串加密学习笔记

对静态分析来说,这个强度的加密还是有点弱,加密函数很容易就会被看出来,还可以加点另外的平坦化之类的混淆,对分析加密函数也提高难度。



LLVM字符串加密学习笔记


看雪ID:L@in

https://bbs.kanxue.com/user-home-907632.htm

*本文为看雪论坛优秀文章,由 L@in 原创,转载请注明来自看雪社区

LLVM字符串加密学习笔记

# 往期推荐

1、在 Windows下搭建LLVM 使用环境

2、深入学习smali语法

3、安卓加固脱壳分享

4、Flutter 逆向初探

5、一个简单实践理解栈空间转移

6、记一次某盾手游加固的脱壳与修复


LLVM字符串加密学习笔记


LLVM字符串加密学习笔记

球分享

LLVM字符串加密学习笔记

球点赞

LLVM字符串加密学习笔记

球在看

原文始发于微信公众号(看雪学苑):LLVM字符串加密学习笔记

免责声明:文章中涉及的程序(方法)可能带有攻击性,仅供安全研究与教学之用,读者将其信息做其他用途,由读者承担全部法律及连带责任,本站不承担任何法律及连带责任;如有问题可邮件联系(建议使用企业邮箱或有效邮箱,避免邮件被拦截,联系方式见首页),望知悉。
  • 左青龙
  • 微信扫一扫
  • weinxin
  • 右白虎
  • 微信扫一扫
  • weinxin
admin
  • 本文由 发表于 2023年7月27日10:05:14
  • 转载请保留本文链接(CN-SEC中文网:感谢原作者辛苦付出):
                   LLVM字符串加密学习笔记https://cn-sec.com/archives/1911632.html
                  免责声明:文章中涉及的程序(方法)可能带有攻击性,仅供安全研究与教学之用,读者将其信息做其他用途,由读者承担全部法律及连带责任,本站不承担任何法律及连带责任;如有问题可邮件联系(建议使用企业邮箱或有效邮箱,避免邮件被拦截,联系方式见首页),望知悉.

发表评论

匿名网友 填写信息