1
简介
本文主要是分析CVE-2024-3446漏洞的成因和漏洞的补丁,以及之前的补丁为何失效,顺便对Qemu历史重入漏洞进行了分析梳理
2
时间线
-
2020/07/21 e1000e重入导致的UAF(还没归为重入问题)
-
2021/08/23 重入漏洞整理为一类问题
https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com/ -
2023/04/28 给出第一个通用用修复方案
https://gitlab.com/qemu-project/qemu/-/commit/c40ca2301c7603524eaddb5308a3f524c6f89d24 -
2024/04/09 修复后再次爆出的重入漏洞(CVE-2024-3446)
https://bugzilla.redhat.com/show_bug.cgi?id=2274211 -
2024/04/10 修复CVE-2024-3446
https://gitlab.com/qemu-project/qemu/-/commit/f243175727903a0d2b52422e7baef86c1838a895
3
漏洞成因
通常是在设备交互的函数执行了DMA操作,即调用了函数cpu_physical_memory_write、address_space_write或pci_dma_write类似函数,当目标地址是设备地址时,导致再次进入设备的交互函数中,从而引发一些安全问题
设备交互
众所周知操作设备有以下3种方式:
-
BH
-
Timer
-
PMIO/MMIO
接下来我们分别了解一下这三种方式的操作及原理
BH
BH 被注册到全局的qemu_aio_context结构中,在事件轮询处理时做响应。BH 维护一个链表,新的操作会插入到尾部,在主线程的qemu_main_loop中依次处理,所以仅仅使用BH不会导致重入
struct QEMUBH {
AioContext *ctx; // 下半部所在的context(qemu_aio_context)
const char *name;
QEMUBHFunc *cb; // 下半部要执行的函数
void *opaque; // 函数参数
QSLIST_ENTRY(QEMUBH) next; // 下一个要执行的下半部
unsigned flags; // 调用qemu_bh_schedule后赋值为BH_PENDING | BH_SCHEDULED,使用后变为0
MemReentrancyGuard *reentrancy_guard; // 新增防护重入
};
qemu_bh_schedule触发后插入到BH事件处理链表
static void bh_test_cb(void *opaque)
{
bhtestState *obj = opaque;
print_timestamp_ms();
qemu_bh_schedule(obj->bh);
}
static void pci_bhtest_realize(PCIDevice *pdev, Error **errp)
{
bhtestState *obj = DO_UPCAST(bhtestState, pdev, pdev);
// 初始化
obj->bh = qemu_bh_new(bh_test_cb, obj);
// 激活
qemu_bh_schedule(obj->bh);
}
Timer
Timer 对象会被注册到main_loop_tlg结构中,初始化时会设置timer_list指向main_loop_tlg中对应类型的链表,Timer 处理同样在主线程的qemu_main_loop函数中,所以仅仅使用Timer也不会导致重入
static void timer_cb(void *opaque)
{
mmiotestState *obj = opaque;
printf("in timer_cbn");
}
static void pci_mmiotest_realize(PCIDevice *pdev, Error **errp)
{
mmiotestState *obj = DO_UPCAST(mmiotestState, pdev, pdev);
// 初始化
obj->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, timer_cb, obj);
}
激活时会将timer对象放到main_loop_tlg的链表中,并设置到期时间
timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
PMIO/MMIO
IO内存注册,当对内存读写时会执行注册的MemoryRegionOps操作,当IO函数中再次执行了DMA操作时,会再次进入对应的操作函数,就会导致重入
static const MemoryRegionOps mmiotest_mmio_ops = {
.read = mmiotest_mmio_read,
.write = mmiotest_mmio_write,
.endianness = DEVICE_NATIVE_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
},
.impl = {
.min_access_size = 1,
.max_access_size = 8,
},
};
static void pci_mmiotest_realize(PCIDevice *pdev, Error **errp)
{
mmiotestState *obj = DO_UPCAST(mmiotestState, pdev, pdev);
memory_region_init_io(&obj->mmio, OBJECT(obj), &mmiotest_mmio_ops, obj, "mmiotest-mmio",0x100);
pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &obj->mmio);
}
mmio重入调用栈样例
DMA操作进入了mr_mmio_write,然后在函数中调用cpu_physical_memory_write再次进入mr_mmio_write导致了重入
in mr_mmio_write (opaque=0x555557779930, addr=3840, val=0, size=8) at ../hw/pci/mr.c:132 0 0x0000555555a3d6c7
in memory_region_write_accessor (mr=0x55555777a2a0, addr=3840, value=0x7ffff6acfdf8, size=8, shift=0, mask=18446744073709551615, attrs=...) at ../softmmu/memory.c:492 1 0x0000555555bf58c3
in access_with_adjusted_size (addr=3840, value=0x7ffff6acfdf8, size=8, access_size_min=1, access_size_max=8, access_fn=0x555555bf57c9 <memory_region_write_accessor>, mr=0x55555777a2a0, attrs=...) at ../softmmu/memory.c:554 2 0x0000555555bf5b11
in memory_region_dispatch_write (mr=0x55555777a2a0, addr=3840, data=0, op=MO_64, attrs=...) at ../softmmu/memory.c:1504 3 0x0000555555bf8c0f
in flatview_write_continue (fv=0x7fff302b57c0, addr=4273938176, attrs=..., ptr=0x55555777a500, len=8, addr1=3840, l=8, mr=0x55555777a2a0) at ../softmmu/physmem.c:2782 4 0x0000555555beb934
in flatview_write (fv=0x7fff302b57c0, addr=4273938176, attrs=..., buf=0x55555777a500, len=8) at ../softmmu/physmem.c:2822 5 0x0000555555beba7d
in address_space_write (as=0x555556768ec0 <address_space_memory>, addr=4273938176, attrs=..., buf=0x55555777a500, len=8) at ../softmmu/physmem.c:2914 6 0x0000555555bebdf7
in address_space_rw (as=0x555556768ec0 <address_space_memory>, addr=4273938176, attrs=..., buf=0x55555777a500, len=8, is_write=true) at ../softmmu/physmem.c:2924 7 0x0000555555bebe68
in cpu_physical_memory_rw (addr=4273938176, buf=0x55555777a500, len=8, is_write=true) at ../softmmu/physmem.c:2933 8 0x0000555555bebed6
in cpu_physical_memory_write (addr=4273938176, buf=0x55555777a500, len=8) at /home/test/Desktop/qemu-6.2.0/include/exec/cpu-common.h:82 9 0x0000555555a3d167
#10 0x0000555555a3d5c2 in mr_mmio_write (opaque=0x555557779930, addr=3840, val=313778176, size=8) at ../hw/pci/mr.c:117
in memory_region_write_accessor (mr=0x55555777a2a0, addr=3840, value=0x7ffff6ad0148, size=8, shift=0, mask=18446744073709551615, attrs=...) at ../softmmu/memory.c:492 11 0x0000555555bf58c3
in access_with_adjusted_size (addr=3840, value=0x7ffff6ad0148, size=8, access_size_min=1, access_size_max=8, access_fn=0x555555bf57c9 <memory_region_write_accessor>, mr=0x55555777a2a0, attrs=...) at ../softmmu/memory.c:554 12 0x0000555555bf5b11
in memory_region_dispatch_write (mr=0x55555777a2a0, addr=3840, data=313778176, op=MO_64, attrs=...) at ../softmmu/memory.c:1504 13 0x0000555555bf8c0f
#14 0x0000555555d30f75 in io_writex (env=0x555556a49410, iotlbentry=0x7fff307ba9a0, mmu_idx=1, val=313778176, addr=140444038311680, retaddr=140735287403492, op=MO_64) at ../accel/tcg/cputlb.c:1420
in store_helper (env=0x555556a49410, addr=140444038311680, val=313778176, oi=49, retaddr=140735287403492, op=MO_64) at ../accel/tcg/cputlb.c:2355 15 0x0000555555d33592
in helper_le_stq_mmu (env=0x555556a49410, addr=140444038311680, val=313778176, oi=49, retaddr=140735287403492) at ../accel/tcg/cputlb.c:2469 16 0x0000555555d33a2e
in code_gen_buffer () 17 0x00007fff7cd023e4
in cpu_tb_exec (cpu=0x555556a40b60, itb=0x7fffba817180, tb_exit=0x7ffff6ad0864) at ../accel/tcg/cpu-exec.c:357 18 0x0000555555d1e243
in cpu_loop_exec_tb (cpu=0x555556a40b60, tb=0x7fffba817180, last_tb=0x7ffff6ad0870, tb_exit=0x7ffff6ad0864) at ../accel/tcg/cpu-exec.c:842 19 0x0000555555d1f108
in cpu_exec (cpu=0x555556a40b60) at ../accel/tcg/cpu-exec.c:1001 20 0x0000555555d1f4c6
in tcg_cpus_exec (cpu=0x555556a40b60) at ../accel/tcg/tcg-accel-ops.c:67 21 0x0000555555d41bea
in mttcg_cpu_thread_fn (arg=0x555556a40b60) at ../accel/tcg/tcg-accel-ops-mttcg.c:95 22 0x0000555555d41f7a
in qemu_thread_start (args=0x555556a60820) at ../util/qemu-thread-posix.c:556 23 0x0000555555efd63b
in start_thread () from target:/lib/x86_64-linux-gnu/libpthread.so.0 24 0x00007ffff7938609
in clone () from target:/lib/x86_64-linux-gnu/libc.so.6 25 0x00007ffff785d353
场景
漏洞场景
-
mmio -> dma -> mmio
-
bh -> dma -> mmio
-
timer -> dma -> mmio
利用场景
-
单设备重复进入(A -> A -> ...)
-
多设备重复进入(A -> B -> A -> B -> ... )(是否存在存疑)
4
漏洞危害
stack overflow
因为不断的进入mmio的操作函数,不断的递归,从而导致了栈被撑爆,触发了Dos攻击
UAF
在触发重入后,比较常见的场景是去执行reset函数,导致一些对象被释放,函数返回继续执行导致UAF
Fix
@a1xndr在2023/04/28提交了多个commit修复re-entrancy问题
分析其中3个重要的Fix
Fix01
这个补丁主要针对场景:mmio -> dma -> mmio (√)
-
在mr结构中增加了dev对象和disable_reentrancy_guard
-
在dev对象中增加mem_reentrancy_guard标志
-
在access_with_adjusted_size函数中对mem_reentrancy_guard标志做了判断
https://github.com/qemu/qemu/commit/a2e1753b8054344f32cf94f31c6399a58794a380#diff-164a41822a3316a554081cfd1c7202bb60723035af488f3fe5a16bff05cdc130L770
static MemTxResult access_with_adjusted_size(hwaddr addr,
uint64_t *value,
unsigned size,
unsigned access_size_min,
unsigned access_size_max,
MemTxResult (*access_fn)
*mr,
hwaddr addr,
uint64_t *value,
unsigned size,
signed shift,
uint64_t mask,
MemTxAttrs attrs),
MemoryRegion *mr,
MemTxAttrs attrs)
{
uint64_t access_mask;
unsigned access_size;
unsigned i;
MemTxResult r = MEMTX_OK;
bool reentrancy_guard_applied = false;
if (!access_size_min) {
access_size_min = 1;
}
if (!access_size_max) {
access_size_max = 4;
}
Do not allow more than one simultaneous access to a device's IO Regions */
if (mr->dev && !mr->disable_reentrancy_guard &&
!mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
re-entrant IO on MemoryRegion: "
at addr: 0x%" HWADDR_PRIX,
addr);
return MEMTX_ACCESS_ERROR;
}
true; =
reentrancy_guard_applied = true;
}
FIXME: support unaligned access? */
access_size = MAX(MIN(size, access_size_max), access_size_min);
access_mask = MAKE_64BIT_MASK(0, access_size * 8);
if (memory_region_big_endian(mr)) {
for (i = 0; i < size; i += access_size) {
r |= access_fn(mr, addr + i, value, access_size,
- access_size - i) * 8, access_mask, attrs);
}
else {
for (i = 0; i < size; i += access_size) {
r |= access_fn(mr, addr + i, value, access_size, i * 8,
attrs);
}
}
if (mr->dev && reentrancy_guard_applied) {
false; =
}
return r;
}
Fix02
这个补丁主要针对场景:bh -> dma -> mmio (√)
-
在BH结构中增加了reentrancy_guard标志
-
在执行BH回调时会对reentrancy_guard进行置位,再进入mr操作则会被阻止
https://gitlab.com/qemu-project/qemu/-/commit/9c86c97f12c060bf7484dd931f38634e166a81f0
void aio_bh_call(QEMUBH *bh)
{
bool last_engaged_in_io = false;
/* Make a copy of the guard-pointer as cb may free the bh */
MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
if (reentrancy_guard) {
last_engaged_in_io = reentrancy_guard->engaged_in_io;
if (reentrancy_guard->engaged_in_io) {
trace_reentrant_aio(bh->ctx, bh->name);
}
reentrancy_guard->engaged_in_io = true;
}
bh->cb(bh->opaque);
if (reentrancy_guard) {
reentrancy_guard->engaged_in_io = last_engaged_in_io;
}
}
Fix03
通过替换以下函数,对设备开启防护:
-
qemu_bh_new-> qemu_bh_new_guarded
-
aio_bh_new-> aio_bh_new_guarded
https://gitlab.com/qemu-project/qemu/-/commit/f63192b0544af5d3e4d5edfd85ab520fcf671377
被修改的设备列表:
1. xen9pfs
2. virtio-blk
3. xen-block
4. virtio-serial-bus
5. qxl
6. virtio-gpu
7. ahci
8. imx_rngc
9. mac_dbdma
10. virtio-net
11. nvme
12. scsi/*
13. usb/*
14. virtio-balloon
15. virtio-crypto
新增的guard类函数,主要是添加了reentrancy_guard参数
aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)
QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
MemReentrancyGuard *reentrancy_guard)
{
return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
reentrancy_guard);
}
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
const char *name, MemReentrancyGuard *reentrancy_guard)
{
QEMUBH *bh;
bh = g_new(QEMUBH, 1);
*bh = (QEMUBH){
.ctx = ctx,
.cb = cb,
.opaque = opaque,
.name = name,
.reentrancy_guard = reentrancy_guard,
};
return bh;
}
QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
QEMUBHFunc *cb, void *opaque,
const char *name)
{
DeviceState *transport = qdev_get_parent_bus(dev)->parent;
return qemu_bh_new_full(cb, opaque, name,
&transport->mem_reentrancy_guard);
}
这里有一个问题,补丁只修复了两种情况,第三种情况是没有修复的
mmio -> dma -> mmio (√)
bh -> dma -> mmio (√)
timer -> dma -> mmio (x)
5
CVE-2024-3446
漏洞描述
在 QEMU virtio 设备(virtio-gpu、virtio-serial-bus、virtio-crypto)中发现了双重释放漏洞,其中 mem_reentrancy_guard 标志不足以防止 DMA 重入问题。此问题可能允许恶意特权来宾用户破坏主机上的 QEMU 进程,从而导致拒绝服务或允许在主机上的 QEMU 进程上下文中执行任意代码。
之前的补丁为何无效?
重新修复了这个三个设备代码(其它设备为什么不受影响?)
-
virtio-gpu
-
virtio-crypto
-
virtio-serial-bus
https://gitlab.com/qemu-project/qemu/-/commits/master?search=CVE-2024-3446
Fix分析
以virtio-gpu为样例,替换了qemu_bh_new_guarded函数使用virtio_bh_new_guarded_full函数
virtio_bh_new_guarded_full函数将判断重入时使用的设备从dev变成了dev->parent_bus->parent,parent指向的是谁?
这就涉及virtio设备的注册过程,如果是普通的pci设备它的结构就是pci-bus -> pci-device,而virtio设备会注册一个代理设备virtio-xx-pci,以virtio-gpu为例,它的结构是pci-bus -> virtio-gpu-pci -> virtio-bus -> virtio-gpu,所以parent指向的是virtio-gpu-pci设备,为什么要使用它呢?
QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
QEMUBHFunc *cb, void *opaque,
const char *name)
{
// dev->parent_bus->parent(virtio-xx-pci)
DeviceState *transport = qdev_get_parent_bus(dev)->parent;
return qemu_bh_new_full(cb, opaque, name,
&transport->mem_reentrancy_guard);
}
跟一下virtio设备mmio初始化流程,virtio设备调用realize函数初始化时会执行到virtio_bus_device_plugged,在virtio_bus_device_plugged中会注册mmio操作,而在注册io操作中指定的owner(就是proxy对象,virtio-xx-pci),会被保存到mr->dev中,所以mr->dev指向的是virtio-xx-pci,这就导致了之前补丁无效的原因
static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
{
VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
VirtioBusState *bus = &proxy->bus;
bool legacy = virtio_pci_legacy(proxy);
bool modern;
bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
uint8_t *config;
uint32_t size;
VirtIODevice *vdev = virtio_bus_get_device(bus);
...
virtio_pci_modern_regions_init(proxy, vdev->name);
virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
virtio_pci_modern_mem_region_map(proxy, &proxy->notify, ¬ify.cap);
....
}
static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
const char *vdev_name)
{
...
g_string_printf(name, "virtio-pci-common-%s", vdev_name);
memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
&common_ops,
proxy,
name->str,
proxy->common.size);
g_string_printf(name, "virtio-pci-isr-%s", vdev_name);
memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
&isr_ops,
proxy,
name->str,
proxy->isr.size);
g_string_printf(name, "virtio-pci-device-%s", vdev_name);
memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
&device_ops,
proxy,
name->str,
proxy->device.size);
g_string_printf(name, "virtio-pci-notify-%s", vdev_name);
memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
¬ify_ops,
proxy,
name->str,
proxy->notify.size);
g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name);
memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
¬ify_pio_ops,
proxy,
name->str,
proxy->notify_pio.size);
}
6
如何发现此类漏洞
fuzz
之前外界公开过一个fuzz工具的实现,它对Qemu代码进行了定制,通过hook dma相关操作,喂给dma操作特定的数据,这样就能捕获到dma操作导致的异常
codeql
《Hunting and Exploiting Recursive MMIO Flaws in QEMU/KVM》中提供的Codeql代码,主要功能就是路径搜索,源为内存io操作的注册函数,目标为dma write的功能函数
/**
* @kind path-problem
*/
class MMIOFn extends Function {
MMIOFn() {
exists(GlobalVariable gv |
gv.getFile().getAbsolutePath().regexpMatch(".*qemu-6.1.0/hw/.*") and
gv.getType().getName().regexpMatch(".*MemoryRegionOps.*") and
gv.getName().regexpMatch(".*mmio.*") and
gv.getInitializer().getExpr().getChild(1).toString() = this.toString()
)
}
}
class BHTFn extends Function {
BHTFn() {
exists(FunctionCall fc |
fc.getTarget().getName().regexpMatch("qemu_bh_new_full|timer_new_ns") and
fc.getFile().getAbsolutePath().regexpMatch(".*qemu-6.1.0/hw/.*") and
(fc.getChild(0).toString() = this.toString() or fc.getChild(1).toString() = this.toString())
)
}
}
class FreeFn extends Function {
FreeFn() {
exists(FunctionCall fc |
fc.getTarget().getName().matches("g_free") and
fc.getEnclosingFunction() = this and
not this.getName().regexpMatch(".*shutdown.*") and
not this.getFile()
.getRelativePath()
.regexpMatch(".*error.*|.*test.*|.*replay.*|.*translate-all.*|.*xen.*|.*qapi-visit.*")
)
}
}
class ReentryFn extends Function {
ReentryFn() {
this.getName()
.regexpMatch("address_space_write|dma_memory_write|stb_dma|stl_be_dma|stl_le_dm
a|stq_be_dma|stq_le_dma|stw_be_dma|stw_le_dma|pci_dma_write|dma_buf_read|...")
}
}
query predicate edges(Function a, Function b) {
a.calls(b)
}
from MMIOFn entry_fn, ReentryFn end_fn
where edges+(entry_fn, end_fn)
select end_fn, entry_fn, end_fn, "MMIO -> Reentry: from " + entry_fn.getName() + " to " +
end_fn.getName()
扩展分析
对比上面的修复列表还有3个virtio设备没有更新补丁,是否还存在漏洞
-
virtio-blk
v8.2.0 移除了BH功能函数 (https://github.com/qemu/qemu/commit/073458da5619c8914a91440ef243d1e2b888c1b7)
-
virtio-net
无
-
virtio-balloon
无
是否存在: timer -> dma -> mmio攻击场景
用codeql搜索未找到此类攻击路径,但应该存在这种攻击风险
6
参考
https://i.blackhat.com/Asia-22/Thursday-Materials/AS-22-Qiuhao-Recursive-MMIO-final.pdf
https://conference.hitb.org/hitbsecconf2023ams/materials/D1T1 - Leveraging Advanced Techniques of DMA Reentrancy to Escape QEMU - Quan Jin & Ao Wang.pdf
原文始发于微信公众号(华为安全应急响应中心):Qemu重入漏洞梳理 & CVE-2024-3446分析
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论