10 년 전 · bf2071eda3
--- a/src/string/x86_64/memset.s
+++ b/src/string/x86_64/memset.s
@@ -1,41 +1,43 @@
 
				 .global memset
			
 
				 .type memset,@function
			
 
				 memset:
			
 
				-	and $0xff,%esi
			
 
				+	movzbl %sil,%esi
			
 
				 	mov $0x101010101010101,%rax
			
 
				-	mov %rdx,%rcx
			
 
				-	mov %rdi,%r8
			
 
				+	# 64-bit imul has 3-7 cycles latency, launch early
			
 
				 	imul %rsi,%rax
			
 
				-	cmp $16,%rcx
			
 
				+
			
 
				+	cmp $16,%rdx
			
 
				 	jb 1f
			
 
				 
			
 
				-	mov %rax,-8(%rdi,%rcx)
			
 
				+	mov %rdx,%rcx
			
 
				+	mov %rdi,%r8
			
 
				 	shr $3,%rcx
			
 
				+	mov %rax,-8(%rdi,%rdx)
			
 
				 	rep
			
 
				 	stosq
			
 
				 	mov %r8,%rax
			
 
				 	ret
			
 
				 
			
 
				-1:	test %ecx,%ecx
			
 
				+1:	test %edx,%edx
			
 
				 	jz 1f
			
 
				 
			
 
				 	mov %al,(%rdi)
			
 
				-	mov %al,-1(%rdi,%rcx)
			
 
				-	cmp $2,%ecx
			
 
				+	mov %al,-1(%rdi,%rdx)
			
 
				+	cmp $2,%edx
			
 
				 	jbe 1f
			
 
				 
			
 
				 	mov %al,1(%rdi)
			
 
				-	mov %al,-2(%rdi,%rcx)
			
 
				-	cmp $4,%ecx
			
 
				+	mov %al,-2(%rdi,%rdx)
			
 
				+	cmp $4,%edx
			
 
				 	jbe 1f
			
 
				 
			
 
				 	mov %eax,(%rdi)
			
 
				-	mov %eax,-4(%rdi,%rcx)
			
 
				-	cmp $8,%ecx
			
 
				+	mov %eax,-4(%rdi,%rdx)
			
 
				+	cmp $8,%edx
			
 
				 	jbe 1f
			
 
				 
			
 
				 	mov %eax,4(%rdi)
			
 
				-	mov %eax,-8(%rdi,%rcx)
			
 
				+	mov %eax,-8(%rdi,%rdx)
			
 
				 
			
 
				-1:	mov %r8,%rax
			
 
				+1:	mov %rdi,%rax
			
 
				 	ret