ARM support, optimize popcount fn

Change-Id: Iec02d0b5973721a3943b9c23ced3afc721cd3753
This commit is contained in:
William Wilgus 2022-03-10 15:48:51 -05:00 committed by William Wilgus
parent eecf840989
commit 376ffbcf9a

View file

@ -705,6 +705,9 @@ __aeabi_idivmod:
/*
* int __popcountsi2(unsigned int x)
* int __popcountdi2(unsigned long x)
* x = x - ((x >> 1) & 0x55555555);
* x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
* c = ((x + (x >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
*/
.section .text.__popcountsi2, "ax", %progbits
.global __popcountsi2
@ -712,23 +715,25 @@ __aeabi_idivmod:
.global __popcountdi2
.type __popcountdi2, %function
.set __popcountdi2, __popcountsi2
__popcountsi2:
mov r1, #0x33 @ r1 = 0x33333333
orr r1, r1, r1, lsl #8 @ ...
orr r1, r1, r1, lsl #16 @ ...
eor r2, r1, r1, lsl #1 @ r2 = 0x55555555
and r2, r2, r0, lsr #1 @ r2 = (x >> 1) & 0x55555555
sub r0, r0, r2 @ x = x - ((x >> 1) & 0x55555555)
and r2, r1, r0 @ r2 = x & 0x33333333
and r1, r1, r0, lsr #2 @ r1 = (x >> 2) & 0x33333333
add r0, r2, r1 @ x = (x & 0x33333333) + ((x >> 2) & 0x33333333)
mov r1, #0x0f @ r1 = 0x0f0f0f0f
orr r1, r1, r1, lsl #8 @ ...
orr r1, r1, r1, lsl #16 @ ...
add r0, r0, lsr #4 @ x = x + (x >> 4)
and r0, r0, r1 @ x = (x + (x >> 4)) & 0x0f0f0f0f
add r0, r0, lsr #16 @ x = x + (x >> 16)
add r0, r0, lsr #8 @ x = x + (x >> 8)
and r0, r0, #0x3f @ x &= 0x3f
bx lr @ return x
ldr r2, .L2 @ r2 = 0x55555555
ldr r3, .L2+4 @ r3 = 0x33333333
and r2, r2, r0, lsr #1 @ r2 = (x >> 1)
rsb r2, r2, r0 @ x = x - ((x >> 1) & 0x55555555)
and r0, r2, r3
and r3, r3, r2, lsr #2 @ r3 = (x >> 2)
add r0, r0, r3
ldr r3, .L2+8 @ r3 = 0xF0F0F0F
add r0, r0, r0, lsr #4 @ x = x + (x >> 4)
and r3, r0, r3
add r3, r3, r3, asl #8
add r3, r3, r3, asl #16
mov r0, r3, lsr #24 @ (r3 >> 24)
bx lr
.L2:
.word 0x55555555
.word 0x33333333
.word 0xF0F0F0F
.size __popcountsi2, .-__popcountsi2