f40bfc9267
Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97 Reviewed-on: http://gerrit.rockbox.org/137 Reviewed-by: Nils Wallménius <nils@rockbox.org> Tested-by: Nils Wallménius <nils@rockbox.org>
535 lines
16 KiB
ArmAsm
535 lines
16 KiB
ArmAsm
/***************************************************************************
|
|
* __________ __ ___.
|
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
* \/ \/ \/ \/ \/
|
|
* $Id$
|
|
*
|
|
* Copyright (C) 2005 by Thom Johansen
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
* KIND, either express or implied.
|
|
*
|
|
****************************************************************************/
|
|
|
|
/* The following are assembler optimised version of the LPC filtering
|
|
routines needed for FLAC decoding. They is optimised for use with the
|
|
MCF5249 processor, or any other similar ColdFire core with the EMAC unit.
|
|
*/
|
|
|
|
/* This routine deals with sample widths 16 and lower. All LPC filtering up to
|
|
order 10 is done in specially optimised unrolled loops, while every order
|
|
above this is handled by a slower default routine.
|
|
*/
|
|
.section .icode,"ax",@progbits
|
|
.global lpc_decode_emac
|
|
.align 2
|
|
lpc_decode_emac:
|
|
lea.l (-44, %sp), %sp
|
|
movem.l %d2-%d7/%a2-%a6, (%sp)
|
|
movem.l (44+4, %sp), %d0-%d2/%a0-%a1
|
|
/* d0 = blocksize, d1 = qlevel, d2 = pred_order
|
|
a0 = data, a1 = coeffs
|
|
*/
|
|
|
|
/* the data pointer always lags behind history pointer by 'pred_order'
|
|
samples. since we have one loop for each order, we can hard code this
|
|
and free a register by not saving data pointer.
|
|
*/
|
|
move.l %d2, %d3
|
|
neg.l %d3
|
|
lea.l (%a0, %d3.l*4), %a0 | history
|
|
clr.l %d3
|
|
move.l %d3, %macsr | we'll need integer mode for this
|
|
tst.l %d0
|
|
jeq .exit | zero samples to process, exit
|
|
moveq.l #10, %d3
|
|
cmp.l %d3, %d2
|
|
jgt .default | order is over 10, jump to default case
|
|
jmp.l (2, %pc, %d2.l*4) | jump to loop corresponding to pred_order
|
|
| jumptable:
|
|
bra.w .exit | zero order filter isn't possible, exit function
|
|
bra.w .order1
|
|
bra.w .order2
|
|
bra.w .order3
|
|
bra.w .order4
|
|
bra.w .order5
|
|
bra.w .order6
|
|
bra.w .order7
|
|
bra.w .order8
|
|
bra.w .order9
|
|
|
|
| last jump table entry coincides with target, so leave it out
|
|
.order10:
|
|
movem.l (%a1), %d3-%d7/%a1-%a5 | load lpc coefs
|
|
move.l (%a0)+, %a6 | load first history sample
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d7, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d6, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d3, (-9*4, %a0), %a6, %acc0 | load for the next iteration
|
|
movclr.l %acc0, %d2 | get sum
|
|
asr.l %d1, %d2 | shift sum by qlevel bits
|
|
add.l %d2, (%a0) | add residual and save
|
|
lea.l (-8*4, %a0), %a0 | point history back at second element
|
|
subq.l #1, %d0 | decrement sample count
|
|
jne 1b | are we done?
|
|
jra .exit
|
|
|
|
.order9:
|
|
movem.l (%a1), %d4-%d7/%a1-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d7, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d6, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d4, (-8*4, %a0), %a6, %acc0
|
|
movclr.l %acc0, %d2
|
|
asr.l %d1, %d2
|
|
add.l %d2, (%a0)
|
|
lea.l (-7*4, %a0), %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.order8:
|
|
movem.l (%a1), %d5-%d7/%a1-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d7, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d6, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d5, (-7*4, %a0), %a6, %acc0
|
|
movclr.l %acc0, %d2
|
|
asr.l %d1, %d2
|
|
add.l %d2, (%a0)
|
|
lea.l (-6*4, %a0), %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.order7:
|
|
movem.l (%a1), %d6-%d7/%a1-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d7, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d6, (-6*4, %a0), %a6, %acc0
|
|
movclr.l %acc0, %d2
|
|
asr.l %d1, %d2
|
|
add.l %d2, (%a0)
|
|
lea.l (-5*4, %a0), %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.order6:
|
|
movem.l (%a1), %d7/%a1-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d7, (-5*4, %a0), %a6, %acc0
|
|
movclr.l %acc0, %d2
|
|
asr.l %d1, %d2
|
|
add.l %d2, (%a0)
|
|
lea.l (-4*4, %a0), %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.order5:
|
|
movem.l (%a1), %a1-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (-4*4, %a0), %a6, %acc0
|
|
movclr.l %acc0, %d2
|
|
asr.l %d1, %d2
|
|
add.l %d2, (%a0)
|
|
lea.l (-3*4, %a0), %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.order4:
|
|
movem.l (%a1), %a2-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (-3*4, %a0), %a6, %acc0
|
|
movclr.l %acc0, %d2
|
|
asr.l %d1, %d2
|
|
add.l %d2, (%a0)
|
|
subq.l #8, %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.order3:
|
|
movem.l (%a1), %a3-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (-2*4, %a0), %a6, %acc0
|
|
movclr.l %acc0, %d2
|
|
asr.l %d1, %d2
|
|
add.l %d2, (%a0)
|
|
subq.l #4, %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.order2:
|
|
movem.l (%a1), %a4-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, %acc0 | data for next iteration is already loaded
|
|
movclr.l %acc0, %d2
|
|
asr.l %d1, %d2
|
|
add.l %d2, (%a0)
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.order1:
|
|
| no point in using mac here
|
|
move.l (%a1), %a5
|
|
1:
|
|
move.l %a5, %d2
|
|
muls.l (%a0)+, %d2
|
|
asr.l %d1, %d2
|
|
add.l %d2, (%a0)
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.default:
|
|
/* we do the filtering in an unrolled by 4 loop as far as we can, and then
|
|
do the rest by jump table. */
|
|
lea.l (%a1, %d2.l*4), %a2 | need to start in the other end of coefs
|
|
move.l %a0, %a3 | working copy of history pointer
|
|
move.l %d2, %d3
|
|
lsr.l #2, %d3 | coefs/4, num of iterations needed in next loop
|
|
move.l (%a3)+, %a5 | preload data for loop
|
|
1:
|
|
lea.l (-4*4, %a2), %a2 | move lpc coef pointer four samples backwards
|
|
movem.l (%a2), %d4-%d7 | load four coefs
|
|
mac.l %a5, %d7, (%a3)+, %a5, %acc0
|
|
mac.l %a5, %d6, (%a3)+, %a5, %acc0
|
|
mac.l %a5, %d5, (%a3)+, %a5, %acc0
|
|
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
|
subq.l #1, %d3 | any more unrolled loop operations left?
|
|
jne 1b
|
|
|
|
moveq.l #3, %d3 | mask 0x00000003
|
|
and.l %d2, %d3 | get the remaining samples to be filtered
|
|
jmp.l (2, %pc, %d3*2) | then jump into mac.l chain
|
|
| jumptable:
|
|
bra.b 3f | none left
|
|
bra.b 2f | one left
|
|
bra.b 1f | two left
|
|
| three left
|
|
move.l -(%a2), %d4
|
|
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
|
1:
|
|
move.l -(%a2), %d4
|
|
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
|
2:
|
|
move.l -(%a2), %d4
|
|
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
|
3:
|
|
movclr.l %acc0, %d3 | get result
|
|
asr.l %d1, %d3 | shift qlevel bits right
|
|
add.l %a5, %d3 | add residual, which is in a5 by now
|
|
move.l %d3, -(%a3) | save, a3 is also one past save location
|
|
addq.l #4, %a0 | increment history pointer
|
|
subq.l #1, %d0 | decrement sample count
|
|
jne .default | are we done?
|
|
jra .exit | if so, fall through to exit
|
|
|
|
|
|
/* This routine deals with sample widths 24 and lower. All LPC filtering up to
|
|
order 8 is done in specially optimised unrolled loops, while every order
|
|
above this is handled by a slower default routine.
|
|
*/
|
|
.global lpc_decode_emac_wide
|
|
.align 2
|
|
lpc_decode_emac_wide:
|
|
lea.l (-44, %sp), %sp
|
|
movem.l %d2-%d7/%a2-%a6, (%sp)
|
|
movem.l (44+4, %sp), %d0-%d1/%d3/%a0-%a1
|
|
/* d0 = blocksize, d1 = qlevel, d3 = pred_order
|
|
a0 = data, a1 = coeffs
|
|
*/
|
|
|
|
/* the data pointer always lags behind history pointer by 'pred_order'
|
|
samples. since we have one loop for each order, we can hard code this
|
|
and free a register by not saving data pointer.
|
|
*/
|
|
move.l %d3, %d2
|
|
neg.l %d2
|
|
lea.l (%a0, %d2.l*4), %a0 | history
|
|
clr.l %d2
|
|
move.l %d2, %macsr | we'll need integer mode for this
|
|
tst.l %d0
|
|
jeq .exit | zero samples to process, exit
|
|
moveq.l #32, %d2
|
|
sub.l %d1, %d2 | calculate shift amount for extension byte
|
|
moveq.l #8, %d4
|
|
cmp.l %d4, %d3
|
|
jgt .wdefault | order is over 8, jump to default case
|
|
jmp.l (2, %pc, %d3.l*4) | jump to loop corresponding to pred_order
|
|
| jumptable:
|
|
bra.w .exit | zero order filter isn't possible, exit function
|
|
bra.w .worder1
|
|
bra.w .worder2
|
|
bra.w .worder3
|
|
bra.w .worder4
|
|
bra.w .worder5
|
|
bra.w .worder6
|
|
bra.w .worder7
|
|
|
|
| last jump table entry coincides with target, so leave it out
|
|
.worder8:
|
|
movem.l (%a1), %d5-%d7/%a1-%a5 | load lpc coefs
|
|
move.l (%a0)+, %a6 | load first history sample
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d7, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d6, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d5, (-7*4, %a0), %a6, %acc0 | load for the next iteration
|
|
move.l %accext01, %d4 | get top 8 bits of sum
|
|
movclr.l %acc0, %d3 | then botten 32 bits
|
|
lsr.l %d1, %d3 | shift bottom bits qlevel bits right
|
|
asl.l %d2, %d4 | shift top bits 32 - qlevel bits left
|
|
or.l %d4, %d3 | now combine results
|
|
add.l %d3, (%a0) | add residual and save
|
|
lea.l (-6*4, %a0), %a0 | point history back at second element
|
|
subq.l #1, %d0 | decrement sample count
|
|
jne 1b | are we done?
|
|
jra .exit
|
|
|
|
.worder7:
|
|
movem.l (%a1), %d6-%d7/%a1-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d7, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d6, (-6*4, %a0), %a6, %acc0
|
|
move.l %accext01, %d4
|
|
movclr.l %acc0, %d3
|
|
lsr.l %d1, %d3
|
|
asl.l %d2, %d4
|
|
or.l %d4, %d3
|
|
add.l %d3, (%a0)
|
|
lea.l (-5*4, %a0), %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.worder6:
|
|
movem.l (%a1), %d7/%a1-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %d7, (-5*4, %a0), %a6, %acc0
|
|
move.l %accext01, %d4
|
|
movclr.l %acc0, %d3
|
|
lsr.l %d1, %d3
|
|
asl.l %d2, %d4
|
|
or.l %d4, %d3
|
|
add.l %d3, (%a0)
|
|
lea.l (-4*4, %a0), %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.worder5:
|
|
movem.l (%a1), %a1-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a1, (-4*4, %a0), %a6, %acc0
|
|
move.l %accext01, %d4
|
|
movclr.l %acc0, %d3
|
|
lsr.l %d1, %d3
|
|
asl.l %d2, %d4
|
|
or.l %d4, %d3
|
|
add.l %d3, (%a0)
|
|
lea.l (-3*4, %a0), %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.worder4:
|
|
movem.l (%a1), %a2-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a2, (-3*4, %a0), %a6, %acc0
|
|
move.l %accext01, %d4
|
|
movclr.l %acc0, %d3
|
|
lsr.l %d1, %d3
|
|
asl.l %d2, %d4
|
|
or.l %d4, %d3
|
|
add.l %d3, (%a0)
|
|
subq.l #8, %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.worder3:
|
|
movem.l (%a1), %a3-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a3, (-2*4, %a0), %a6, %acc0
|
|
move.l %accext01, %d4
|
|
movclr.l %acc0, %d3
|
|
lsr.l %d1, %d3
|
|
asl.l %d2, %d4
|
|
or.l %d4, %d3
|
|
add.l %d3, (%a0)
|
|
subq.l #4, %a0
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.worder2:
|
|
movem.l (%a1), %a4-%a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0)+, %a6, %acc0
|
|
mac.l %a6, %a4, %acc0 | data for next iteration is already loaded
|
|
move.l %accext01, %d4
|
|
movclr.l %acc0, %d3
|
|
lsr.l %d1, %d3
|
|
asl.l %d2, %d4
|
|
or.l %d4, %d3
|
|
add.l %d3, (%a0)
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.worder1:
|
|
move.l (%a1), %a5
|
|
move.l (%a0)+, %a6
|
|
1:
|
|
mac.l %a6, %a5, (%a0), %a6, %acc0
|
|
move.l %accext01, %d4
|
|
movclr.l %acc0, %d3
|
|
lsr.l %d1, %d3
|
|
asl.l %d2, %d4
|
|
or.l %d4, %d3
|
|
add.l %a6, %d3 | residual is already in a6
|
|
move.l %d3, (%a0)+
|
|
subq.l #1, %d0
|
|
jne 1b
|
|
jra .exit
|
|
|
|
.wdefault:
|
|
/* we do the filtering in an unrolled by 4 loop as far as we can, and then
|
|
do the rest by jump table. */
|
|
lea.l (%a1, %d3.l*4), %a2 | need to start in the other end of coefs
|
|
move.l %a0, %a3 | working copy of history pointer
|
|
move.l %d3, %d4
|
|
lsr.l #2, %d4 | coefs/4, num of iterations needed in next loop
|
|
move.l (%a3)+, %a5 | preload data for loop
|
|
1:
|
|
lea.l (-4*4, %a2), %a2 | move lpc coef pointer four samples backwards
|
|
movem.l (%a2), %d5-%d7/%a4 | load four coefs
|
|
mac.l %a5, %a4, (%a3)+, %a5, %acc0
|
|
mac.l %a5, %d7, (%a3)+, %a5, %acc0
|
|
mac.l %a5, %d6, (%a3)+, %a5, %acc0
|
|
mac.l %a5, %d5, (%a3)+, %a5, %acc0
|
|
subq.l #1, %d4 | any more unrolled loop operations left?
|
|
jne 1b
|
|
|
|
moveq.l #3, %d4 | mask 0x00000003
|
|
and.l %d3, %d4 | get the remaining samples to be filtered
|
|
jmp.l (2, %pc, %d4*2) | then jump into mac.l chain
|
|
| jumptable:
|
|
bra.b 3f | none left
|
|
bra.b 2f | one left
|
|
bra.b 1f | two left
|
|
| three left
|
|
move.l -(%a2), %d4
|
|
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
|
1:
|
|
move.l -(%a2), %d4
|
|
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
|
2:
|
|
move.l -(%a2), %d4
|
|
mac.l %a5, %d4, (%a3)+, %a5, %acc0
|
|
3:
|
|
move.l %accext01, %d5 | get high 32 bits of result
|
|
movclr.l %acc0, %d4 | get low 32 bits of result
|
|
lsr.l %d1, %d4 | shift qlevel bits right
|
|
asl.l %d2, %d5 | shift 32 - qlevel bits left
|
|
or.l %d5, %d4 | combine top and low bits after shift
|
|
add.l %a5, %d4 | add residual, which is in a5 by now
|
|
move.l %d4, -(%a3) | save, a3 is also one past save location
|
|
addq.l #4, %a0 | increment history pointer
|
|
subq.l #1, %d0 | decrement sample count
|
|
jne .wdefault | are we done?
|
|
| if so, fall through to exit
|
|
|
|
.exit:
|
|
movem.l (%sp), %d2-%d7/%a2-%a6
|
|
lea.l (44, %sp), %sp
|
|
rts
|