Linux kernel读取和写入用户态内存

阅读量: searchstar 2022-06-26 17:08:22
Categories: Tags:

copy_from_usercopy_to_user底层都是copy_user_generic。其主体部分是正常复制数据,但是访问用户态数据时,可能会出现page fault,这时硬件会自动跳转到内核提供的page fault handler do_page_fault,将page准备好之后再跳回原来的指令继续执行。但是如果page fault处理失败了,比如这不是一个valid address,那么内核会搜索这条指令对应的fixup point,如果找到了,就跳转到这个fixup point。

参考:https://www.kernel.org/doc/Documentation/x86/exception-tables.txt

copy_user_generic根据CPU的特性有三种实现:

static __always_inline __must_check unsigned long
copy_user_generic(void *to, const void *from, unsigned len)
{
unsigned ret;

/*
* If CPU has ERMS feature, use copy_user_enhanced_fast_string.
* Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
* Otherwise, use copy_user_generic_unrolled.
*/
alternative_call_2(copy_user_generic_unrolled,
copy_user_generic_string,
X86_FEATURE_REP_GOOD,
copy_user_enhanced_fast_string,
X86_FEATURE_ERMS,
ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
"=d" (len)),
"1" (to), "2" (from), "3" (len)
: "memory", "rcx", "r8", "r9", "r10", "r11");
return ret;
}

如果有ERMS(Enhanced REP MOVSB/STOSB instructions),就用copy_user_enhanced_fast_string;否则,如果有REP指令,就用copy_user_generic_string;否则用通用的copy_user_generic_unrolled

内嵌汇编详解:C语言内嵌汇编学习笔记ASM_OUTPUT2中,=表示既读又写,D表示di,作为第一个函数参数,S表示si,作为第二个函数参数,d表示dx,作为第三个函数参数,a表示ax,作为返回值。"1" (to), "2" (from), "3" (len)其实是Input Operands,其中123分别表示对应的输入操作数的物理位置必须与第1、2、3个输出操作数的物理位置一致。"memory", "rcx", "r8", "r9", "r10", "r11"Clobbers,表示内存、rcxr8r9r10r11的内容被修改了。

copy_user_enhanced_fast_string

现代CPU通常有ERMS,所以先看copy_user_enhanced_fast_string

/*
* Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
* It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
ENTRY(copy_user_enhanced_fast_string)
// Set AC
ASM_STAC
cmpl $64,%edx
// .L_copy_short_string在copy_user_generic_unrolled的实现里。
jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
// 把第三个参数edx赋值给ecx,用作rep指令的counter。
movl %edx,%ecx
// rep movsb其实可以看作是一条指令,表示将si指向的cx个数据拷贝到di指向的位置。
// 相当于while (cx != 0) {*di++ = *si++; cx--}
// 参考:https://stackoverflow.com/questions/43343231/enhanced-rep-movsb-for-memcpy
1: rep
movsb
// 将返回值eax清零
xorl %eax,%eax
// Clear AC
ASM_CLAC
ret

.section .fixup,"ax"
// 此时ecx保存了没有被拷贝的字节数。将其赋值给edx,即作为copy_user_handle_tail的第三个参数。
12: movl %ecx,%edx /* ecx is zerorest also */
// rdi和rsi分别是第一和第二个参数。
jmp copy_user_handle_tail
.previous

// 如果指令1(即rep movsb)出错了,就跳转到指令12(即fixup section)
_ASM_EXTABLE_UA(1b, 12b)
ENDPROC(copy_user_enhanced_fast_string)

STAC: Set AC。CLAC: Clear AC。参考:https://www.felixcloutier.com/x86/stac

copy_user_handle_tail可以看作是尽量多复制一些字节,从而精准定位到出错的那个字节:

/*
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
*/
__visible unsigned long
copy_user_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++) {
char c;

if (__get_user_nocheck(c, from++, sizeof(char)))
break;
if (__put_user_nocheck(c, to, sizeof(char)))
break;
}
clac();
return len;
}

copy_user_generic_string

ENTRY(copy_user_generic_string)
ASM_STAC
cmpl $8,%edx
jb 2f /* less than 8 bytes, go to byte copy loop */
ALIGN_DESTINATION
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
1: rep
movsq
2: movl %edx,%ecx
3: rep
movsb
xorl %eax,%eax
ASM_CLAC
ret

.section .fixup,"ax"
11: leal (%rdx,%rcx,8),%ecx
12: movl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous

_ASM_EXTABLE_UA(1b, 11b)
_ASM_EXTABLE_UA(3b, 12b)
ENDPROC(copy_user_generic_string)

copy_user_enhanced_fast_string差不多,主要是先进行了rep movsq,即先以8字节为单位做拷贝,再rep movsb,即逐字节拷贝。

copy_user_generic_unrolled

/*
* copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
* code for rep movsq
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
ENTRY(copy_user_generic_unrolled)
ASM_STAC
cmpl $8,%edx
jb 20f /* less then 8 bytes, go to byte copy loop */
ALIGN_DESTINATION
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
jz .L_copy_short_string
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
4: movq 3*8(%rsi),%r11
5: movq %r8,(%rdi)
6: movq %r9,1*8(%rdi)
7: movq %r10,2*8(%rdi)
8: movq %r11,3*8(%rdi)
9: movq 4*8(%rsi),%r8
10: movq 5*8(%rsi),%r9
11: movq 6*8(%rsi),%r10
12: movq 7*8(%rsi),%r11
13: movq %r8,4*8(%rdi)
14: movq %r9,5*8(%rdi)
15: movq %r10,6*8(%rdi)
16: movq %r11,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
.L_copy_short_string:
movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz 20f
18: movq (%rsi),%r8
19: movq %r8,(%rdi)
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
decl %ecx
jnz 18b
20: andl %edx,%edx
jz 23f
movl %edx,%ecx
21: movb (%rsi),%al
22: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 21b
23: xor %eax,%eax
ASM_CLAC
ret

.section .fixup,"ax"
30: shll $6,%ecx
addl %ecx,%edx
jmp 60f
40: leal (%rdx,%rcx,8),%edx
jmp 60f
50: movl %ecx,%edx
60: jmp copy_user_handle_tail /* ecx is zerorest also */
.previous

_ASM_EXTABLE_UA(1b, 30b)
_ASM_EXTABLE_UA(2b, 30b)
_ASM_EXTABLE_UA(3b, 30b)
_ASM_EXTABLE_UA(4b, 30b)
_ASM_EXTABLE_UA(5b, 30b)
_ASM_EXTABLE_UA(6b, 30b)
_ASM_EXTABLE_UA(7b, 30b)
_ASM_EXTABLE_UA(8b, 30b)
_ASM_EXTABLE_UA(9b, 30b)
_ASM_EXTABLE_UA(10b, 30b)
_ASM_EXTABLE_UA(11b, 30b)
_ASM_EXTABLE_UA(12b, 30b)
_ASM_EXTABLE_UA(13b, 30b)
_ASM_EXTABLE_UA(14b, 30b)
_ASM_EXTABLE_UA(15b, 30b)
_ASM_EXTABLE_UA(16b, 30b)
_ASM_EXTABLE_UA(18b, 40b)
_ASM_EXTABLE_UA(19b, 40b)
_ASM_EXTABLE_UA(21b, 50b)
_ASM_EXTABLE_UA(22b, 50b)
ENDPROC(copy_user_generic_unrolled)

就是一个做了循环展开的拷贝。