1
0
Эх сурвалжийг харах

add big-endian support to ARM assembler memcpy

Allow the existing ARM assembler memcpy implementation to be used for
both big and little endian targets.
Andre McCurdy 5 жил өмнө
parent
commit
9dce93ac7f

+ 1 - 1
COPYRIGHT

@@ -127,7 +127,7 @@ Copyright © 2017-2018 Arm Limited
 and labelled as such in comments in the individual source files. All
 and labelled as such in comments in the individual source files. All
 have been licensed under extremely permissive terms.
 have been licensed under extremely permissive terms.
 
 
-The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
+The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
 The Android Open Source Project and is licensed under a two-clause BSD
 The Android Open Source Project and is licensed under a two-clause BSD
 license. It was taken from Bionic libc, used on Android.
 license. It was taken from Bionic libc, used on Android.
 
 

+ 97 - 4
src/string/arm/memcpy_le.S → src/string/arm/memcpy.S

@@ -1,5 +1,3 @@
-#if !__ARMEB__
-
 /*
 /*
  * Copyright (C) 2008 The Android Open Source Project
  * Copyright (C) 2008 The Android Open Source Project
  * All rights reserved.
  * All rights reserved.
@@ -42,7 +40,7 @@
  * code safely callable from thumb mode, adjusting the return
  * code safely callable from thumb mode, adjusting the return
  * instructions to be compatible with pre-thumb ARM cpus, removal of
  * instructions to be compatible with pre-thumb ARM cpus, removal of
  * prefetch code that is not compatible with older cpus and support for
  * prefetch code that is not compatible with older cpus and support for
- * building as thumb 2.
+ * building as thumb 2 and big-endian.
  */
  */
 
 
 .syntax unified
 .syntax unified
@@ -227,24 +225,45 @@ non_congruent:
 	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
 	 * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
 	 */
 	 */
 	movs    r5, r5, lsl #31
 	movs    r5, r5, lsl #31
+
+#if __ARMEB__
+	movmi   r3, r3, ror #24
+	strbmi	r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs	r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs	r3, [r0], #1
+#else
 	strbmi r3, [r0], #1
 	strbmi r3, [r0], #1
 	movmi   r3, r3, lsr #8
 	movmi   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	strbcs r3, [r0], #1
 	movcs   r3, r3, lsr #8
 	movcs   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	strbcs r3, [r0], #1
 	movcs   r3, r3, lsr #8
 	movcs   r3, r3, lsr #8
+#endif
 
 
 	cmp     r2, #4
 	cmp     r2, #4
 	blo     partial_word_tail
 	blo     partial_word_tail
 
 
+#if __ARMEB__
+	mov	r3, r3, lsr r12
+	mov	r3, r3, lsl r12
+#endif
+
 	/* Align destination to 32 bytes (cache line boundary) */
 	/* Align destination to 32 bytes (cache line boundary) */
 1:      tst     r0, #0x1c
 1:      tst     r0, #0x1c
 	beq     2f
 	beq     2f
 	ldr     r5, [r1], #4
 	ldr     r5, [r1], #4
 	sub     r2, r2, #4
 	sub     r2, r2, #4
+#if __ARMEB__
+	mov     r4, r5,                 lsr lr
+	orr     r4, r4, r3
+	mov     r3, r5,                 lsl r12
+#else
 	mov     r4, r5,                 lsl lr
 	mov     r4, r5,                 lsl lr
 	orr     r4, r4, r3
 	orr     r4, r4, r3
 	mov     r3, r5,                 lsr r12
 	mov     r3, r5,                 lsr r12
+#endif
 	str     r4, [r0], #4
 	str     r4, [r0], #4
 	cmp     r2, #4
 	cmp     r2, #4
 	bhs     1b
 	bhs     1b
@@ -270,6 +289,25 @@ loop16:
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	subs    r2, r2, #32
 	subs    r2, r2, #32
 	ldrhs   r12, [r1], #4
 	ldrhs   r12, [r1], #4
+#if __ARMEB__
+	orr     r3, r3, r4, lsr #16
+	mov     r4, r4, lsl #16
+	orr     r4, r4, r5, lsr #16
+	mov     r5, r5, lsl #16
+	orr     r5, r5, r6, lsr #16
+	mov     r6, r6, lsl #16
+	orr     r6, r6, r7, lsr #16
+	mov     r7, r7, lsl #16
+	orr     r7, r7, r8, lsr #16
+	mov     r8, r8, lsl #16
+	orr     r8, r8, r9, lsr #16
+	mov     r9, r9, lsl #16
+	orr     r9, r9, r10, lsr #16
+	mov     r10, r10,               lsl #16
+	orr     r10, r10, r11, lsr #16
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsl #16
+#else
 	orr     r3, r3, r4, lsl #16
 	orr     r3, r3, r4, lsl #16
 	mov     r4, r4, lsr #16
 	mov     r4, r4, lsr #16
 	orr     r4, r4, r5, lsl #16
 	orr     r4, r4, r5, lsl #16
@@ -287,6 +325,7 @@ loop16:
 	orr     r10, r10, r11, lsl #16
 	orr     r10, r10, r11, lsl #16
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	mov     r3, r11, lsr #16
 	mov     r3, r11, lsr #16
+#endif
 	bhs     1b
 	bhs     1b
 	b       less_than_thirtytwo
 	b       less_than_thirtytwo
 
 
@@ -296,6 +335,25 @@ loop8:
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	subs    r2, r2, #32
 	subs    r2, r2, #32
 	ldrhs   r12, [r1], #4
 	ldrhs   r12, [r1], #4
+#if __ARMEB__
+	orr     r3, r3, r4, lsr #24
+	mov     r4, r4, lsl #8
+	orr     r4, r4, r5, lsr #24
+	mov     r5, r5, lsl #8
+	orr     r5, r5, r6, lsr #24
+	mov     r6, r6,  lsl #8
+	orr     r6, r6, r7, lsr #24
+	mov     r7, r7,  lsl #8
+	orr     r7, r7, r8,             lsr #24
+	mov     r8, r8,  lsl #8
+	orr     r8, r8, r9,             lsr #24
+	mov     r9, r9,  lsl #8
+	orr     r9, r9, r10,    lsr #24
+	mov     r10, r10, lsl #8
+	orr     r10, r10, r11,  lsr #24
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsl #8
+#else
 	orr     r3, r3, r4, lsl #24
 	orr     r3, r3, r4, lsl #24
 	mov     r4, r4, lsr #8
 	mov     r4, r4, lsr #8
 	orr     r4, r4, r5, lsl #24
 	orr     r4, r4, r5, lsl #24
@@ -313,6 +371,7 @@ loop8:
 	orr     r10, r10, r11,  lsl #24
 	orr     r10, r10, r11,  lsl #24
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	mov     r3, r11, lsr #8
 	mov     r3, r11, lsr #8
+#endif
 	bhs     1b
 	bhs     1b
 	b       less_than_thirtytwo
 	b       less_than_thirtytwo
 
 
@@ -322,6 +381,25 @@ loop24:
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	ldmia   r1!, {   r5,r6,r7,  r8,r9,r10,r11}
 	subs    r2, r2, #32
 	subs    r2, r2, #32
 	ldrhs   r12, [r1], #4
 	ldrhs   r12, [r1], #4
+#if __ARMEB__
+	orr     r3, r3, r4, lsr #8
+	mov     r4, r4, lsl #24
+	orr     r4, r4, r5, lsr #8
+	mov     r5, r5, lsl #24
+	orr     r5, r5, r6, lsr #8
+	mov     r6, r6, lsl #24
+	orr     r6, r6, r7, lsr #8
+	mov     r7, r7, lsl #24
+	orr     r7, r7, r8, lsr #8
+	mov     r8, r8, lsl #24
+	orr     r8, r8, r9, lsr #8
+	mov     r9, r9, lsl #24
+	orr     r9, r9, r10, lsr #8
+	mov     r10, r10, lsl #24
+	orr     r10, r10, r11, lsr #8
+	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
+	mov     r3, r11, lsl #24
+#else
 	orr     r3, r3, r4, lsl #8
 	orr     r3, r3, r4, lsl #8
 	mov     r4, r4, lsr #24
 	mov     r4, r4, lsr #24
 	orr     r4, r4, r5, lsl #8
 	orr     r4, r4, r5, lsl #8
@@ -339,6 +417,7 @@ loop24:
 	orr     r10, r10, r11, lsl #8
 	orr     r10, r10, r11, lsl #8
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	stmia   r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
 	mov     r3, r11, lsr #24
 	mov     r3, r11, lsr #24
+#endif
 	bhs     1b
 	bhs     1b
 
 
 less_than_thirtytwo:
 less_than_thirtytwo:
@@ -350,9 +429,15 @@ less_than_thirtytwo:
 
 
 1:      ldr     r5, [r1], #4
 1:      ldr     r5, [r1], #4
 	sub     r2, r2, #4
 	sub     r2, r2, #4
+#if __ARMEB__
+	mov     r4, r5,                 lsr lr
+	orr     r4, r4, r3
+	mov     r3,     r5,                     lsl r12
+#else
 	mov     r4, r5,                 lsl lr
 	mov     r4, r5,                 lsl lr
 	orr     r4, r4, r3
 	orr     r4, r4, r3
 	mov     r3,     r5,                     lsr r12
 	mov     r3,     r5,                     lsr r12
+#endif
 	str     r4, [r0], #4
 	str     r4, [r0], #4
 	cmp     r2, #4
 	cmp     r2, #4
 	bhs     1b
 	bhs     1b
@@ -360,11 +445,20 @@ less_than_thirtytwo:
 partial_word_tail:
 partial_word_tail:
 	/* we have a partial word in the input buffer */
 	/* we have a partial word in the input buffer */
 	movs    r5, lr, lsl #(31-3)
 	movs    r5, lr, lsl #(31-3)
+#if __ARMEB__
+	movmi   r3, r3, ror #24
+	strbmi r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs r3, [r0], #1
+	movcs   r3, r3, ror #24
+	strbcs r3, [r0], #1
+#else
 	strbmi r3, [r0], #1
 	strbmi r3, [r0], #1
 	movmi   r3, r3, lsr #8
 	movmi   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	strbcs r3, [r0], #1
 	movcs   r3, r3, lsr #8
 	movcs   r3, r3, lsr #8
 	strbcs r3, [r0], #1
 	strbcs r3, [r0], #1
+#endif
 
 
 	/* Refill spilled registers from the stack. Don't update sp. */
 	/* Refill spilled registers from the stack. Don't update sp. */
 	ldmfd   sp, {r5-r11}
 	ldmfd   sp, {r5-r11}
@@ -383,4 +477,3 @@ copy_last_3_and_return:
 	ldmfd   sp!, {r0, r4, lr}
 	ldmfd   sp!, {r0, r4, lr}
 	bx      lr
 	bx      lr
 
 
-#endif

+ 0 - 3
src/string/arm/memcpy.c

@@ -1,3 +0,0 @@
-#if __ARMEB__
-#include "../memcpy.c"
-#endif