From 376ffbcf9aabae6f47d62ba4734ae1bb230ebce3 Mon Sep 17 00:00:00 2001 From: William Wilgus Date: Thu, 10 Mar 2022 15:48:51 -0500 Subject: [PATCH] ARM support, optimize popcount fn Change-Id: Iec02d0b5973721a3943b9c23ced3afc721cd3753 --- lib/arm_support/support-arm.S | 43 +++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/lib/arm_support/support-arm.S b/lib/arm_support/support-arm.S index 442a629fca..f99d086b0b 100644 --- a/lib/arm_support/support-arm.S +++ b/lib/arm_support/support-arm.S @@ -705,6 +705,9 @@ __aeabi_idivmod: /* * int __popcountsi2(unsigned int x) * int __popcountdi2(unsigned long x) + * x = x - ((x >> 1) & 0x55555555); + * x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + * c = ((x + (x >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; */ .section .text.__popcountsi2, "ax", %progbits .global __popcountsi2 @@ -712,23 +715,25 @@ __aeabi_idivmod: .global __popcountdi2 .type __popcountdi2, %function .set __popcountdi2, __popcountsi2 + __popcountsi2: - mov r1, #0x33 @ r1 = 0x33333333 - orr r1, r1, r1, lsl #8 @ ... - orr r1, r1, r1, lsl #16 @ ... - eor r2, r1, r1, lsl #1 @ r2 = 0x55555555 - and r2, r2, r0, lsr #1 @ r2 = (x >> 1) & 0x55555555 - sub r0, r0, r2 @ x = x - ((x >> 1) & 0x55555555) - and r2, r1, r0 @ r2 = x & 0x33333333 - and r1, r1, r0, lsr #2 @ r1 = (x >> 2) & 0x33333333 - add r0, r2, r1 @ x = (x & 0x33333333) + ((x >> 2) & 0x33333333) - mov r1, #0x0f @ r1 = 0x0f0f0f0f - orr r1, r1, r1, lsl #8 @ ... - orr r1, r1, r1, lsl #16 @ ... - add r0, r0, lsr #4 @ x = x + (x >> 4) - and r0, r0, r1 @ x = (x + (x >> 4)) & 0x0f0f0f0f - add r0, r0, lsr #16 @ x = x + (x >> 16) - add r0, r0, lsr #8 @ x = x + (x >> 8) - and r0, r0, #0x3f @ x &= 0x3f - bx lr @ return x - .size __popcountsi2, .-__popcountsi2 + ldr r2, .L2 @ r2 = 0x55555555 + ldr r3, .L2+4 @ r3 = 0x33333333 + and r2, r2, r0, lsr #1 @ r2 = (x >> 1) + rsb r2, r2, r0 @ x = x - ((x >> 1) & 0x55555555) + and r0, r2, r3 + and r3, r3, r2, lsr #2 @ r3 = (x >> 2) + add r0, r0, r3 + ldr r3, .L2+8 @ r3 = 0xF0F0F0F + add r0, r0, r0, lsr #4 @ x = x + (x >> 4) + and r3, r0, r3 + add r3, r3, r3, asl #8 + add r3, r3, r3, asl #16 + mov r0, r3, lsr #24 @ (r3 >> 24) + bx lr +.L2: + .word 0x55555555 + .word 0x33333333 + .word 0xF0F0F0F + .size __popcountsi2, .-__popcountsi2 +