copy_from_user
和copy_to_user
底层都是copy_user_generic
。其主体部分是正常复制数据,但是访问用户态数据时,可能会出现page
fault,这时硬件会自动跳转到内核提供的page fault handler
do_page_fault
,将page准备好之后再跳回原来的指令继续执行。但是如果page
fault处理失败了,比如这不是一个valid
address,那么内核会搜索这条指令对应的fixup
point,如果找到了,就跳转到这个fixup point。
参考:https://www.kernel.org/doc/Documentation/x86/exception-tables.txt
copy_user_generic
根据CPU的特性有三种实现:
static __always_inline __must_check unsigned long
(void *to, const void *from, unsigned len)
copy_user_generic{
unsigned ret;
/*
* If CPU has ERMS feature, use copy_user_enhanced_fast_string.
* Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
* Otherwise, use copy_user_generic_unrolled.
*/
(copy_user_generic_unrolled,
alternative_call_2,
copy_user_generic_string,
X86_FEATURE_REP_GOOD,
copy_user_enhanced_fast_string,
X86_FEATURE_ERMS("=a" (ret), "=D" (to), "=S" (from),
ASM_OUTPUT2"=d" (len)),
"1" (to), "2" (from), "3" (len)
: "memory", "rcx", "r8", "r9", "r10", "r11");
return ret;
}
如果有ERMS
(Enhanced REP MOVSB/STOSB
instructions),就用copy_user_enhanced_fast_string
;否则,如果有REP
指令,就用copy_user_generic_string
;否则用通用的copy_user_generic_unrolled
。
内嵌汇编详解:C语言内嵌汇编学习笔记。ASM_OUTPUT2
中,=
表示既读又写,D
表示di
,作为第一个函数参数,S
表示si
,作为第二个函数参数,d
表示dx
,作为第三个函数参数,a
表示ax
,作为返回值。"1" (to), "2" (from), "3" (len)
其实是Input Operands
,其中1
、2
、3
分别表示对应的输入操作数的物理位置必须与第1、2、3个输出操作数的物理位置一致。"memory", "rcx", "r8", "r9", "r10", "r11"
是Clobbers
,表示内存、rcx
、r8
、r9
、r10
、r11
的内容被修改了。
¶
copy_user_enhanced_fast_string
现代CPU通常有ERMS
,所以先看copy_user_enhanced_fast_string
:
/*/STOSB instructions.
* Some CPUs are adding enhanced REP MOVSB's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
* It
*:
* Input
* rdi destination
* rsi source
* rdx count
*:
* Outputor 0 if successful.
* eax uncopied bytes
*/ENTRY(copy_user_enhanced_fast_string)
Set AC
//
ASM_STAC$64,%edx
cmpl
// .L_copy_short_string在copy_user_generic_unrolled的实现里。jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
// 把第三个参数edx赋值给ecx,用作rep指令的counter。%edx,%ecx
movl rep movsb其实可以看作是一条指令,表示将si指向的cx个数据拷贝到di指向的位置。
// (cx != 0) {*di++ = *si++; cx--}
// 相当于while ://stackoverflow.com/questions/43343231/enhanced-rep-movsb-for-memcpy
// 参考:httpsrep
1: movsb
// 将返回值eax清零%eax,%eax
xorl
// Clear AC
ASM_CLACret
section .fixup,"ax"
.
// 此时ecx保存了没有被拷贝的字节数。将其赋值给edx,即作为copy_user_handle_tail的第三个参数。%ecx,%edx /* ecx is zerorest also */
12: movl
// rdi和rsi分别是第一和第二个参数。jmp copy_user_handle_tail
.previous
12(即fixup section)
// 如果指令1(即rep movsb)出错了,就跳转到指令(1b, 12b)
_ASM_EXTABLE_UA(copy_user_enhanced_fast_string) ENDPROC
STAC: Set AC。CLAC: Clear AC。参考:https://www.felixcloutier.com/x86/stac
copy_user_handle_tail
可以看作是尽量多复制一些字节,从而精准定位到出错的那个字节:
/*
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
*/
unsigned long
__visible (char *to, char *from, unsigned len)
copy_user_handle_tail{
for (; len; --len, to++) {
char c;
if (__get_user_nocheck(c, from++, sizeof(char)))
break;
if (__put_user_nocheck(c, to, sizeof(char)))
break;
}
();
clacreturn len;
}
¶
copy_user_generic_string
(copy_user_generic_string)
ENTRY
ASM_STAC8,%edx
cmpl $2f /* less than 8 bytes, go to byte copy loop */
jb
ALIGN_DESTINATION%edx,%ecx
movl 3,%ecx
shrl $7,%edx
andl $1: rep
movsq2: movl %edx,%ecx
3: rep
movsb%eax,%eax
xorl
ASM_CLAC
ret
.section .fixup,"ax"
11: leal (%rdx,%rcx,8),%ecx
12: movl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail.previous
(1b, 11b)
_ASM_EXTABLE_UA(3b, 12b)
_ASM_EXTABLE_UA(copy_user_generic_string) ENDPROC
与copy_user_enhanced_fast_string
差不多,主要是先进行了rep movsq
,即先以8字节为单位做拷贝,再rep movsb
,即逐字节拷贝。
¶
copy_user_generic_unrolled
/*
* copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
* code for rep movsq
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
(copy_user_generic_unrolled)
ENTRY
ASM_STAC8,%edx
cmpl $20f /* less then 8 bytes, go to byte copy loop */
jb
ALIGN_DESTINATION%edx,%ecx
movl 63,%edx
andl $6,%ecx
shrl $.L_copy_short_string
jz 1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
4: movq 3*8(%rsi),%r11
5: movq %r8,(%rdi)
6: movq %r9,1*8(%rdi)
7: movq %r10,2*8(%rdi)
8: movq %r11,3*8(%rdi)
9: movq 4*8(%rsi),%r8
10: movq 5*8(%rsi),%r9
11: movq 6*8(%rsi),%r10
12: movq 7*8(%rsi),%r11
13: movq %r8,4*8(%rdi)
14: movq %r9,5*8(%rdi)
15: movq %r10,6*8(%rdi)
16: movq %r11,7*8(%rdi)
64(%rsi),%rsi
leaq 64(%rdi),%rdi
leaq %ecx
decl 1b
jnz .L_copy_short_string:
%edx,%ecx
movl 7,%edx
andl $3,%ecx
shrl $20f
jz 18: movq (%rsi),%r8
19: movq %r8,(%rdi)
8(%rsi),%rsi
leaq 8(%rdi),%rdi
leaq %ecx
decl 18b
jnz 20: andl %edx,%edx
23f
jz %edx,%ecx
movl 21: movb (%rsi),%al
22: movb %al,(%rdi)
%rsi
incq %rdi
incq %ecx
decl 21b
jnz 23: xor %eax,%eax
ASM_CLAC
ret
.section .fixup,"ax"
30: shll $6,%ecx
%ecx,%edx
addl 60f
jmp 40: leal (%rdx,%rcx,8),%edx
60f
jmp 50: movl %ecx,%edx
60: jmp copy_user_handle_tail /* ecx is zerorest also */
.previous
(1b, 30b)
_ASM_EXTABLE_UA(2b, 30b)
_ASM_EXTABLE_UA(3b, 30b)
_ASM_EXTABLE_UA(4b, 30b)
_ASM_EXTABLE_UA(5b, 30b)
_ASM_EXTABLE_UA(6b, 30b)
_ASM_EXTABLE_UA(7b, 30b)
_ASM_EXTABLE_UA(8b, 30b)
_ASM_EXTABLE_UA(9b, 30b)
_ASM_EXTABLE_UA(10b, 30b)
_ASM_EXTABLE_UA(11b, 30b)
_ASM_EXTABLE_UA(12b, 30b)
_ASM_EXTABLE_UA(13b, 30b)
_ASM_EXTABLE_UA(14b, 30b)
_ASM_EXTABLE_UA(15b, 30b)
_ASM_EXTABLE_UA(16b, 30b)
_ASM_EXTABLE_UA(18b, 40b)
_ASM_EXTABLE_UA(19b, 40b)
_ASM_EXTABLE_UA(21b, 50b)
_ASM_EXTABLE_UA(22b, 50b)
_ASM_EXTABLE_UA(copy_user_generic_unrolled) ENDPROC
就是一个做了循环展开的拷贝。