近日,锦行安全团队监测到 Linux Kernel 存在本地权限提升漏洞,该漏洞为Linux Kernel的io_uring 子系统中存在释放后重用漏洞,拥有低权限的本地攻击者可以利用该漏洞将权限提升到ROOT权限。影响版本:Linux Kernel < 6.1-rc1,锦行安全研究员对该漏洞原理进行分析如下:
1.Commit Information
io_uring/af_unix: defer registered files gc to io_uring release
Instead of putting io_uring's registered files in unix_gc() we want it
to be done by io_uring itself. The trick here is to consider io_uring
registered files for cycle detection but not actually putting them down.
Because io_uring can't register other ring instances, this will remove
all refs to the ring file triggering the ->release path and clean up
with io_ring_ctx_free().
Cc: [email protected]
Fixes: 6b06314 ("io_uring: add file set registration")
Reported-and-tested-by: David Bouman <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: Thadeu Lima de Souza Cascardo <[email protected]>
[axboe: add kerneldoc comment to skb, fold in skb leak fix]
Signed-off-by: Jens Axboe <[email protected]>
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9fcf534f2d927..7be5bb4c94b6d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -803,6 +803,7 @@ typedef unsigned char *sk_buff_data_t;
* @csum_level: indicates the number of consecutive checksums found in
* the packet minus one that have been verified as
* CHECKSUM_UNNECESSARY (max 3)
+ * @scm_io_uring: SKB holds io_uring registered files
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
* @slow_gro: state present at GRO time, slower prepare step required
@@ -982,6 +983,7 @@ struct sk_buff {
#endif
__u8 slow_gro:1;
__u8 csum_not_inet:1;
+ __u8 scm_io_uring:1;
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 6f88ded0e7e56..012fdb04ec238 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -855,6 +855,7 @@ int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
UNIXCB(skb).fp = fpl;
skb->sk = sk;
+ skb->scm_io_uring = 1;
skb->destructor = unix_destruct_scm;
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index d45d5366115a7..dc27635403932 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -204,6 +204,7 @@ void wait_for_unix_gc(void)
/* The external entry point: unix_gc() */
void unix_gc(void)
{
+ struct sk_buff *next_skb, *skb;
struct unix_sock *u;
struct unix_sock *next;
struct sk_buff_head hitlist;
@@ -297,11 +298,30 @@ void unix_gc(void)
spin_unlock(&unix_gc_lock);
+ /* We need io_uring to clean its registered files, ignore all io_uring
+ * originated skbs. It's fine as io_uring doesn't keep references to
+ * other io_uring instances and so killing all other files in the cycle
+ * will put all io_uring references forcing it to go through normal
+ * release.path eventually putting registered files.
+ */
+ skb_queue_walk_safe(&hitlist, skb, next_skb) {
+ if (skb->scm_io_uring) {
+ __skb_unlink(skb, &hitlist);
+ skb_queue_tail(&skb->sk->sk_receive_queue, skb);
+ }
+ }
+
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);
spin_lock(&unix_gc_lock);
+ /* There could be io_uring registered files, just push them back to
+ * the inflight list
+ */
+ list_for_each_entry_safe(u, next, &gc_candidates, link)
+ list_move_tail(&u->link, &gc_inflight_list);
+
/* All candidates should have been detached by now. */
BUG_ON(!list_empty(&gc_candidates));
2. Patch analysis
-
在unix_gc函数执行的过程中略过所有io_uring相关的skb数据结构的释放。
3. RootCause analysis
-
当创建一个IO_URING对象时,同时会创建一个与其对应的unix_socket。(CONFIG_UNIX=1)
/*
* Allocate an anonymous fd, this is what constitutes the application
* visible backing of an io_uring instance. The application mmaps this
* fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
* we have to tie this fd to a socket for file garbage collection purposes.
*/
static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
{
struct file *file;
#if defined(CONFIG_UNIX)
int ret;
ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
&ctx->ring_sock);
if (ret)
return ERR_PTR(ret);
#endif
file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
O_RDWR | O_CLOEXEC);
#if defined(CONFIG_UNIX)
if (IS_ERR(file)) {
sock_release(ctx->ring_sock);
ctx->ring_sock = NULL;
} else {
ctx->ring_sock->file = file;
}
#endif
return file;
}
-
IO_URING
将使用unix_socket
的sk_receive_queue
来管理在子系统中注册的所有文件的生命周期,这是一种对sk_receive_queue
变通的使用,我个人认为是不规范的使用。
/*
* Ensure the UNIX gc is aware of our file set, so we are certain that
* the io_uring can be safely unregistered on process exit, even if we have
* loops in the file referencing.
*/
static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
{
struct sock *sk = ctx->ring_sock->sk;
struct scm_fp_list *fpl;
struct sk_buff *skb;
int i, nr_files;
fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
if (!fpl)
return -ENOMEM;
skb = alloc_skb(0, GFP_KERNEL);
if (!skb) {
kfree(fpl);
return -ENOMEM;
}
skb->sk = sk;
nr_files = 0;
fpl->user = get_uid(current_user());
for (i = 0; i < nr; i++) {
struct file *file = io_file_from_index(ctx, i + offset);
if (!file)
continue;
fpl->fp[nr_files] = get_file(file);
unix_inflight(fpl->user, fpl->fp[nr_files]);
nr_files++;
}
if (nr_files) {
fpl->max = SCM_MAX_FD;
fpl->count = nr_files;
UNIXCB(skb).fp = fpl;
skb->destructor = unix_destruct_scm;
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
skb_queue_head(&sk->sk_receive_queue, skb);
for (i = 0; i < nr_files; i++)
fput(fpl->fp[i]);
} else {
kfree_skb(skb);
kfree(fpl);
}
return 0;
}
-
在正常情况下,如果socket要使用一个skb时,首先需要从sk_receive_queue中将skb从链表中取出。同时,将skb从链表中取出的这个行为是受到锁保护的。 /**
* skb_dequeue - remove from the head of the queue
* @list: list to dequeue from
*
* Remove the head of the list. The list lock is taken so the function
* may be used safely with other locking list functions. The head item is
* returned or %NULL if the list is empty.
*/
struct sk_buff *skb_dequeue(struct sk_buff_head *list)
{
unsigned long flags;
struct sk_buff *result;
spin_lock_irqsave(&list->lock, flags);
result = __skb_dequeue(list);
spin_unlock_irqrestore(&list->lock, flags);
return result;
}
EXPORT_SYMBOL(skb_dequeue); -
但是值得注意的是 IO_URING
子系统在处理 I/O 请求时使用sk_receive_queue
管理的文件对象,没有锁保护。但是导致漏洞的根本原因并不在此,如果IO_URING
子系统在对管理的文件对象进行索引之前增加对象引用计数的话,那么在没有锁保护的情况下使用sk_receive_queue中的数据是没有问题的。 -
然而IO_URING 子系统有一个名为 IOSQE_FIXED_FILE
的特性。当设置了IOSQE_FIXED_FILE
标志时,IO_URING
子系统不会在索引文件对象之前增加其引用计数。正是这个特性最终导致了漏洞的产生。
static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx,
struct io_kiocb *req, int fd)
{
struct file *file;
unsigned long file_ptr;
if (unlikely((unsigned int)fd >= ctx->nr_user_files))
return NULL;
fd = array_index_nospec(fd, ctx->nr_user_files);
file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
file = (struct file *) (file_ptr & FFS_MASK);
file_ptr &= ~FFS_MASK;
/* mask in overlapping REQ_F and FFS bits */
req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT);
io_req_set_rsrc_node(req);
return file;
}
static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
struct io_kiocb *req, int fd)
{
struct file *file = fget(fd);
trace_io_uring_file_get(ctx, fd);
/* we don't allow fixed io_uring files */
if (file && unlikely(file->f_op == &io_uring_fops))
io_req_track_inflight(req);
return file;
}
static inline struct file *io_file_get(struct io_ring_ctx *ctx,
struct io_kiocb *req, int fd, bool fixed)
{
if (fixed)
return io_file_get_fixed(ctx, req, fd);
else
return io_file_get_normal(ctx, req, fd);
}
-
最终漏洞触发的流程大致如下:
-
因为当创建一个IO_URING对象时,同时会创建一个与其对应的unix_socket,所以当 unix_gc
被触发时,如果指向IO_URING对象的unix_socket 符合被gc释放的条件时,gc函数会尝试释放IO_URING的sk_receive_queue
中所有需要释放的skb
-
如果当一个IO_URING对象正在处理 I/O 请求时,unix_gc被触发并尝试释放 sk_receive_queue 中的 skb。且 I/O 请求请求使用 IOSQE_FIXED_FILE
特性时 , 因为这个文件引用既没有被sk_receive_queue的锁保护,也没有在使用前增加引用计数。这将最终导致 I/O 请求引用的文件将被unix_gc
释放。 -
结合一些其他的利用技巧,可以通过该漏洞完成对特权文件的任意写,从而实现本地提权。
4. Variant analysis
1.https://googleprojectzero.blogspot.com/2022/08/the-quantum-state-of-linux-kernel.html
2.https://googleprojectzero.blogspot.com/2022/03/racing-against-clock-hitting-tiny.html
5. 修复建议
sudo apt update
sudo apt-get upgrade linux-image-generic
apt update
apt-get upgrade linux-image-generic
6. 参考资料
[1]https://www.openwall.com/lists/oss-security/2022/10/18/4
[2]https://blog.hacktivesecurity.com/index.php/2022/12/21/cve-2022-2602-dirtycred-file-exploitation-applied-on-an-io_uring-uaf/
原文始发于微信公众号(7号攻防实验室):Linux Kernel 本地权限提升漏洞(CVE-2022-2602) 漏洞分析报告
免责声明:文章中涉及的程序(方法)可能带有攻击性,仅供安全研究与教学之用,读者将其信息做其他用途,由读者承担全部法律及连带责任,本站不承担任何法律及连带责任;如有问题可邮件联系(建议使用企业邮箱或有效邮箱,避免邮件被拦截,联系方式见首页),望知悉。
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论