FS#11335 by me: make ARM assembly functions thumb-friendly

We can't pop into pc on ARMv4t when using thumb: the T bit won't be
modified if we are returning to a thumb function
Code running on ARMv4t should use the new ldrpc / ldmpc macros instead
of ldr pc, [sp], #4 and ldm(cond) sp!, {regs, pc}
No modification on pure ARM builds and ARMv5+

Note: USE_THUMB is currently never defined, no targets can currently be
built with -mthumb, see FS#6734

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26756 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Rafaël Carré 2010-06-11 04:41:36 +00:00
parent fe7ca44471
commit 45c7498f59
39 changed files with 170 additions and 109 deletions

View file

@ -505,7 +505,11 @@ loop:
done:
str r14, [r12] @ Save value of p->buf
add sp, sp, #12 @ Don't bother restoring r1-r3
#ifdef ROCKBOX
ldmpc regs=r4-r11
#else
ldmia sp!, {r4 - r11, pc}
#endif
move_hist:
@ dest = r11 (p->historybuffer)
@ -664,7 +668,11 @@ loopm:
donem:
str r14, [r12] @ Save value of p->buf
add sp, sp, #8 @ Don't bother restoring r1, r2
#ifdef ROCKBOX
ldmpc regs=r4-r11
#else
ldmia sp!, {r4 - r11, pc}
#endif
move_histm:
@ dest = r11 (p->historybuffer)

View file

@ -127,7 +127,7 @@ mdct_butterfly_16:
@ mdct_butterfly_8 increments r0 by another #8*4 here
@ at end, r0 has been incremented by #16*4
ldr pc, [sp], #4
ldrpc
mdct_butterfly_32:
stmdb sp!, {r4-r11, lr}
@ -257,7 +257,7 @@ mdct_butterfly_32:
@ and we wanted to advance by #16*4 anyway, so just call again
bl mdct_butterfly_16
ldmia sp!, {r4-r11, pc}
ldmpc regs=r4-r11
@ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
mdct_butterfly_generic_loop:
@ -433,5 +433,5 @@ mdct_butterfly_generic_loop:
cmp r2, r4
bhi 1b
ldmia sp!, {r4-r11, pc}
ldmpc regs=r4-r11

View file

@ -19,6 +19,8 @@
*
****************************************************************************/
#include "config.h"
.section .text, "ax", %progbits
/****************************************************************************
@ -63,7 +65,7 @@ atrac3_iqmf_matrixing:
subs r3, r3, #4 /* counter -= 4 */
bgt .iqmf_matrixing_loop
ldmfd sp!, {r4-r9, pc} /* restore registers */
ldmpc regs=r4-r9 /* restore registers */
.atrac3_iqmf_matrixing_end:
.size atrac3_iqmf_matrixing,.atrac3_iqmf_matrixing_end-atrac3_iqmf_matrixing
@ -218,7 +220,7 @@ atrac3_iqmf_dewindowing:
subs r3, r3, #1 /* outer loop -= 1 */
bgt .iqmf_dewindow_outer_loop
ldmfd sp!, {r4-r9, pc} /* restore registers */
ldmpc regs=r4-r9 /* restore registers */
.atrac3_iqmf_dewindowing_end:
.size atrac3_iqmf_dewindowing,.atrac3_iqmf_dewindowing_end-atrac3_iqmf_dewindowing

View file

@ -267,5 +267,5 @@ lpc_decode_arm:
bne .default @ no, prepare for next sample
.exit:
ldmia sp!, { r4-r11, pc }
ldmpc regs=r4-r11

View file

@ -220,7 +220,7 @@ dct32:
cmp r0, #9
bne .l4
add sp, sp, #144
ldmia sp!, {r4-r11, pc}
ldmpc regs=r4-r11
bitrev:
.word 0x0
.word 0x2

View file

@ -45,6 +45,7 @@
*
****************************************************************************/
#include "config.h"
/*
On entry:
@ -823,7 +824,7 @@ normal_block_x18_to_x35:
@----
add sp, sp, #(21*4) @ return stack frame
ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return
ldmpc regs=r4-r11 @ restore callee saved regs, and return
@----
@ -992,7 +993,7 @@ start_block_x18_to_x35:
@----
add sp, sp, #(21*4) @ return stack frame
ldmia sp!, { r4 - r11, pc } @ restore callee saved regs, and return
ldmpc regs=r4-r11 @ restore callee saved regs, and return
@----
@END

View file

@ -19,6 +19,7 @@
*
****************************************************************************/
#include "config.h"
#include "mad_iram.h"
.section ICODE_SECTION_MPA_ARM,"ax",%progbits
@ -135,7 +136,7 @@ synth_full_odd_sbsample:
ldr r5, =synth_full_sp
ldr sp, [r5]
ldmia sp!, {r4-r11, pc}
ldmpc regs=r4-r11
synth_full_even_sbsample:
stmdb sp!, {r4-r11, lr}
@ -241,7 +242,7 @@ synth_full_even_sbsample:
ldr r5, =synth_full_sp
ldr sp, [r5]
ldmia sp!, {r4-r11, pc}
ldmpc regs=r4-r11
.global III_aliasreduce
@ -289,7 +290,7 @@ III_aliasreduce:
add r0, r0, #72
cmp r0, r1
blo .arl1
ldmia sp!, {r4-r11, pc}
ldmpc regs=r4-r11
csa:
.word +0x0db84a81
@ -332,7 +333,7 @@ III_overlap:
stmia r1!, {r4, r5, r6, r7, r12, lr}
ldmia r0!, {r4, r5, r6, r7, r12, lr}
stmia r1!, {r4, r5, r6, r7, r12, lr}
ldmia sp!, {r4-r7, pc}
ldmpc regs=r4-r7
.section IBSS_SECTION_MPA_ARM,"aw",%nobits
synth_full_sp:

View file

@ -19,6 +19,8 @@
*
****************************************************************************/
#include "config.h"
.section .text, "ax", %progbits
/****************************************************************************
@ -89,7 +91,7 @@ mpc_decoder_windowing_D:
subs lr, lr, #1
bgt .loop32
ldmfd sp!, {r4-r8, pc}
ldmpc regs=r4-r8
#else
mpc_decoder_windowing_D:
/* r0 = Data[] */
@ -287,7 +289,7 @@ mpc_decoder_windowing_D:
str r8, [r0], #4 /* store Data */
add r1, r1, #4 /* V++ */
ldmfd sp!, {r4-r11, pc}
ldmpc regs=r4-r11
#endif
.mpc_dewindowing_end:
.size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D

View file

@ -49,7 +49,7 @@ iir_mem16:
beq .order_10
cmp r5, #8
beq .order_8
ldmia sp!, { r4-r11, pc } @ Non-supported order, return
ldmpc regs=r4-r11 @ Non-supported order, return
@ TODO: try using direct form 1 filtering
.order_8:
@ -94,7 +94,7 @@ iir_mem16:
bne 0b
ldr r4, [sp, #40] @ r4 = mem
stmia r4, { r5-r12 } @ Save back mem[]
ldmia sp!, { r4-r11, pc } @ Exit
ldmpc regs=r4-r11 @ Exit
.order_10:
ldmia r4, { r5-r9 } @ r5-r9 = mem[0..4]
@ -154,7 +154,7 @@ iir_mem16:
sub r1, r1, #10*2
subs r3, r3, #1
bne .order_10
ldmia sp!, { r4-r11, pc } @ Exit
ldmpc regs=r4-r11 @ Exit
/* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */
@ -300,7 +300,7 @@ qmf_synth:
strh r8, [r6], #4
subs r4, r4, #4
bne 0b
ldmia sp!, { r4-r11, pc } @ Exit
ldmpc regs=r4-r11 @ Exit
/* void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) */
@ -325,5 +325,5 @@ signal_mul:
subs r3, r3, #4 @ Are we done?
bne 0b
ldmia sp!, { r4-r8, pc } @ Exit
ldmpc regs=r4-r8 @ Exit

View file

@ -167,7 +167,7 @@ hybrid_filter:
@ set to the memory: *pA, *(pA-1), *(pA-2), *(pA-3), *pM, *(pM-1), *(pM-2), *(pM-3)
stmneda r2, {r10, r11, r12, lr}
stmneda r3, {r5, r6, r7, r8}
ldmnefd sp!, {r4-r12, pc} @ hybrid_filter end (when fs->index != 0)
ldmpc cond=ne regs=r4-r12 @ hybrid_filter end (when fs->index != 0)
.hf_memshl:
@ memshl (fs->dl)
@ -192,7 +192,7 @@ hybrid_filter:
ldmia r9, {r1, r2, r3, r4}
sub r9, r9, #64 @ r9 = fs->dx
stmia r9, {r1 - r8}
ldmfd sp!, {r4 - r12, pc} @ hybrid_filter end (when fs->index == 0)
ldmpc regs=r4-r12 @ hybrid_filter end (when fs->index == 0)
hybrid_filter_end:
.size hybrid_filter, hybrid_filter_end - hybrid_filter

View file

@ -35,6 +35,9 @@
* 32-bit multiply-accumulate instruction and so will overflow with 24-bit
* WavPack files.
*/
#include "config.h"
.text
.align
.global decorr_stereo_pass_cont_arm
@ -470,5 +473,5 @@ term_minus_3_loop:
common_exit:
strh r4, [r5, #4]
strh r0, [r5, #6]
ldmfd sp!, {r4 - r8, r10, r11, pc}
ldmpc regs="r4-r8, r10-r11"

View file

@ -38,6 +38,8 @@
* instruction.
*/
#include "config.h"
.text
.align
.global decorr_stereo_pass_cont_arml
@ -500,5 +502,5 @@ common_exit:
mov r4, r4, asr #18
strh r4, [r5, #4]
strh r0, [r5, #6]
ldmfd sp!, {r4 - r8, r10, r11, pc}
ldmpc regs="r4-r8, r10-r11"

View file

@ -23,6 +23,9 @@
/****************************************************************************
* void channels_process_sound_chan_mono(int count, int32_t *buf[])
*/
#include "config.h"
.section .icode, "ax", %progbits
.align 2
.global channels_process_sound_chan_mono
@ -47,7 +50,7 @@ channels_process_sound_chan_mono:
stmia r2!, { r12, r14 } @ store Mo0, Mo1
bgt .monoloop @
@
ldmltfd sp!, { r4, pc } @ if count was even, we're done
ldmpc cond=lt, regs=r4 @ if count was even, we're done
@
.mono_singlesample: @
ldr r3, [r1] @ r3 = Ls
@ -57,7 +60,7 @@ channels_process_sound_chan_mono:
str r12, [r1] @ store Mo
str r12, [r2] @ store Mo
@
ldmfd sp!, { r4, pc } @
ldmpc regs=r4 @
.size channels_process_sound_chan_mono, \
.-channels_process_sound_chan_mono
@ -112,7 +115,7 @@ channels_process_sound_chan_custom:
bgt .custom_loop
ldmltfd sp!, { r4-r10, pc } @ < 0? even count
ldmpc cond=lt, regs=r4-r10 @ < 0? even count
.custom_single_sample:
ldr r5, [r1] @ handle odd sample
@ -131,7 +134,7 @@ channels_process_sound_chan_custom:
str r5, [r1] @ Store Lc0
str r7, [r2] @ Store Rc0
ldmfd sp!, { r4-r10, pc }
ldmpc regs=r4-r10
.size channels_process_sound_chan_custom, \
.-channels_process_sound_chan_custom
@ -164,7 +167,7 @@ channels_process_sound_chan_karaoke:
stmia r2!, { r12, r14 } @ store Ro0, Ro1
bgt .karaokeloop @
@
ldmltfd sp!, { r4, pc } @ if count was even, we're done
ldmpc cond=lt, regs=r4 @ if count was even, we're done
@
.karaoke_singlesample: @
ldr r3, [r1] @ r3 = Li
@ -175,7 +178,7 @@ channels_process_sound_chan_karaoke:
str r3, [r1] @ store Lo
str r12, [r2] @ store Ro
@
ldmfd sp!, { r4, pc } @
ldmpc regs=r4 @
.size channels_process_sound_chan_karaoke, \
.-channels_process_sound_chan_karaoke
@ -225,7 +228,7 @@ sample_output_mono:
subs r0, r0, #2
bgt .somloop
ldmltfd sp!, { r4-r6, pc } @ even 'count'? return
ldmpc cond=lt, regs=r4-r6 @ even 'count'? return
.som_singlesample:
ldr r5, [r2] @ do odd sample
@ -239,7 +242,7 @@ sample_output_mono:
orr r5, r5, r5, lsl #16
str r5, [r3]
ldmfd sp!, { r4-r6, pc }
ldmpc regs=r4-r6
.size sample_output_mono, .-sample_output_mono
/****************************************************************************
@ -302,7 +305,7 @@ sample_output_stereo:
subs r0, r0, #2
bgt .sosloop
ldmltfd sp!, { r4-r9, pc } @ even 'count'? return
ldmpc cond=lt, regs=r4-r9 @ even 'count'? return
.sos_singlesample:
ldr r6, [r2] @ left odd sample
@ -324,7 +327,7 @@ sample_output_stereo:
str r8, [r3]
ldmfd sp!, { r4-r9, pc }
ldmpc regs=r4-r9
.size sample_output_stereo, .-sample_output_stereo
#endif /* ARM_ARCH < 6 */
@ -387,7 +390,7 @@ apply_crossfeed:
stmia r12, { r8-r11 } @ save filter history
str r0, [r12, #30*4] @ save delay line index
add sp, sp, #8 @ remove temp variables from stack
ldmia sp!, { r4-r11, pc }
ldmpc regs=r4-r11
.size apply_crossfeed, .-apply_crossfeed
/****************************************************************************
@ -444,7 +447,7 @@ dsp_downsample:
ldr r1, [r3] @ r1 = &dst[0]
sub r8, r8, r1 @ dst - &dst[0]
mov r0, r8, lsr #2 @ convert bytes->samples
ldmia sp!, { r4-r11, pc } @ ... and we're out
ldmpc regs=r4-r11 @ ... and we're out
.size dsp_downsample, .-dsp_downsample
/****************************************************************************
@ -507,7 +510,7 @@ dsp_upsample:
sub r8, r8, r2 @ dst - &dst[0]
mov r0, r8, lsr #2 @ convert bytes->samples
add sp, sp, #8 @ adjust stack for temp variables
ldmfd sp!, { r4-r11, pc } @ ... and we're out
ldmpc regs=r4-r11 @ ... and we're out
.size dsp_upsample, .-dsp_upsample
/****************************************************************************
@ -554,5 +557,5 @@ dsp_apply_gain:
subs r3, r3, #1
bgt .dag_outerloop @ end of outer loop
ldmfd sp!, { r4-r8, pc }
ldmpc regs=r4-r8
.size dsp_apply_gain, .-dsp_apply_gain

View file

@ -85,5 +85,5 @@ eq_filter:
bne .filterloop
add sp, sp, #16 @ compensate for temp storage
ldmia sp!, { r4-r11, pc }
ldmpc regs=r4-r11

View file

@ -19,6 +19,8 @@
*
****************************************************************************/
#include "config.h"
.global mpeg2_idct_copy
.type mpeg2_idct_copy, %function
.global mpeg2_idct_add
@ -313,7 +315,7 @@ mpeg2_idct_copy:
add r1, r1, r2
cmp r0, r12
blo 1b
ldmfd sp!, { r4-r11, pc }
ldmpc regs=r4-r11
mpeg2_idct_add:
cmp r0, #129
@ -385,7 +387,7 @@ mpeg2_idct_add:
add r1, r1, r2
cmp r0, r12
blo 2b
ldmfd sp!, { r4-r11, pc }
ldmpc regs=r4-r11
3:
stmfd sp!, { r4-r5, lr }
ldrsh r1, [r0, #0] /* r1 = block[0] */
@ -438,4 +440,4 @@ mpeg2_idct_add:
add r2, r2, r3
cmp r2, r0
blo 4b
ldmfd sp!, { r4-r5, pc }
ldmpc regs=r4-r5

View file

@ -47,7 +47,7 @@ MC_put_o_16_align0:
subs r3, r3, #1
add r0, r0, r2
bne MC_put_o_16_align0
ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
ldmpc regs=r4-r7 @@ update PC with LR content.
.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
mov \R0, \R0, lsr #(\shift)
@ -71,7 +71,7 @@ MC_put_o_16_align1:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
ldmpc regs=r4-r7 @@ update PC with LR content.
MC_put_o_16_align2:
and r1, r1, #0xFFFFFFFC
@ -83,7 +83,7 @@ MC_put_o_16_align2:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
ldmpc regs=r4-r7 @@ update PC with LR content.
MC_put_o_16_align3:
and r1, r1, #0xFFFFFFFC
@ -95,7 +95,7 @@ MC_put_o_16_align3:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r7, pc} @@ update PC with LR content.
ldmpc regs=r4-r7 @@ update PC with LR content.
@ ----------------------------------------------------------------
.align
@ -120,7 +120,7 @@ MC_put_o_8_align0:
add r0, r0, r2
subs r3, r3, #1
bne MC_put_o_8_align0
ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
ldmpc regs=r4-r5 @@ update PC with LR content.
.macro ADJ_ALIGN_DW shift, R0, R1, R2
mov \R0, \R0, lsr #(\shift)
@ -140,7 +140,7 @@ MC_put_o_8_align1:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
ldmpc regs=r4-r5 @@ update PC with LR content.
MC_put_o_8_align2:
and r1, r1, #0xFFFFFFFC
@ -152,7 +152,7 @@ MC_put_o_8_align2:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
ldmpc regs=r4-r5 @@ update PC with LR content.
MC_put_o_8_align3:
and r1, r1, #0xFFFFFFFC
@ -164,7 +164,7 @@ MC_put_o_8_align3:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4, r5, pc} @@ update PC with LR content.
ldmpc regs=r4-r5 @@ update PC with LR content.
@ ----------------------------------------------------------------
.macro AVG_PW rW1, rW2
@ -218,7 +218,7 @@ MC_put_x_16_align0:
subs r3, r3, #1
add r0, r0, r2
bne MC_put_x_16_align0
ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
ldmpc regs="r4-r8, HIGH_REGS" @@ update PC with LR content.
MC_put_x_16_align1:
and r1, r1, #0xFFFFFFFC
@ -234,7 +234,7 @@ MC_put_x_16_align1:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
ldmpc regs="r4-r8, HIGH_REGS" @@ update PC with LR content.
MC_put_x_16_align2:
and r1, r1, #0xFFFFFFFC
@ -250,7 +250,7 @@ MC_put_x_16_align2:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
ldmpc regs="r4-r8, HIGH_REGS" @@ update PC with LR content.
MC_put_x_16_align3:
and r1, r1, #0xFFFFFFFC
@ -266,7 +266,7 @@ MC_put_x_16_align3:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r8, HIGH_REGS, pc} @@ update PC with LR content.
ldmpc regs="r4-r8, HIGH_REGS" @@ update PC with LR content.
@ ----------------------------------------------------------------
.align
@ -297,7 +297,7 @@ MC_put_x_8_align0:
subs r3, r3, #1
add r0, r0, r2
bne MC_put_x_8_align0
ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
ldmpc regs="r4-r6, HIGH_REGS" @@ update PC with LR content.
MC_put_x_8_align1:
and r1, r1, #0xFFFFFFFC
@ -311,7 +311,7 @@ MC_put_x_8_align1:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
ldmpc regs="r4-r6, HIGH_REGS" @@ update PC with LR content.
MC_put_x_8_align2:
and r1, r1, #0xFFFFFFFC
@ -325,7 +325,7 @@ MC_put_x_8_align2:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
ldmpc regs="r4-r6, HIGH_REGS" @@ update PC with LR content.
MC_put_x_8_align3:
and r1, r1, #0xFFFFFFFC
@ -339,4 +339,4 @@ MC_put_x_8_align3:
subs r3, r3, #1
add r0, r0, r2
bne 1b
ldmfd sp!, {r4-r6, HIGH_REGS, pc} @@ update PC with LR content.
ldmpc regs="r4-r6, HIGH_REGS @@ update PC with LR content.

View file

@ -19,6 +19,7 @@
*
****************************************************************************/
#include "config.h"
#include "pacbox.h"
.section .icode,"ax",%progbits
@ -120,7 +121,7 @@ loop_x:
/* end of y loop */
add r1, r1, #224*3 @ vbuf += 224*3
subs lr, lr, #4 @ y-=4
ldmeqia sp!, {r4-r11, pc}
ldmpc cond=eq, regs=r4-r11
b loop_y
#endif
#endif

View file

@ -89,7 +89,7 @@ jpeg_idct2v:
add r0, r0, #4
cmp r0, r1
bcc 1b
ldmia sp!, { r4, pc }
ldmpc regs=r4
#else
/* ARMv6 offers partitioned adds and subtracts, used here to unroll the loop
to two columns.
@ -137,7 +137,7 @@ jpeg_idct2h:
add r1, r1, r3
cmp r0, r2
bcc 1b
ldmia sp!, { r4-r5, pc }
ldmpc regs=r4-r5
#else
stmdb sp!, { r4, lr }
ldrsh r14, .Lpool4+2
@ -190,7 +190,7 @@ jpeg_idct4v:
add r0, r0, #2
cmp r0, r1
bcc 1b
ldmia sp!, { r4-r7, pc }
ldmpc regs=r4-r7
#elif ARM_ARCH < 6
stmdb sp!, { r4-r8, lr }
mov r8, #1024
@ -221,7 +221,7 @@ jpeg_idct4v:
cmp r0, r1
bcc 1b
ldmia sp!, { r4-r8, pc }
#else
#else /* ARMv6+ */
stmdb sp!, { r4-r10, lr }
ldrd r2, .Lpool4
mov r12, #1024
@ -325,8 +325,8 @@ jpeg_idct4h:
add r1, r1, r3
cmp r0, r2
bcc 1b
ldmia sp!, { r4-r10, pc }
#elif ARM_ARCH < 6
ldmpc regs=r4-r10
#elif ARM_ARCH < 6 /* ARMv5 */
stmdb sp!, { r4-r9, lr }
ldr r4, .Lpool4
ldr r5, .Lpool4+4
@ -367,7 +367,7 @@ jpeg_idct4h:
cmp r0, r2
bcc 1b
ldmia sp!, { r4-r9, pc }
#else
#else /* ARMv6+ */
stmdb sp!, { r4-r9, lr }
ldrd r4, .Lpool4
mov r9, r4, lsr #16
@ -424,7 +424,7 @@ jpeg_idct8v:
cmp r0, r1
add r2, r2, #2
bcc 1b
ldmia sp!, { r4-r11, pc }
ldmpc regs=r4-r11
2:
ldr r14, =4433
ldr r12, =-15137
@ -586,7 +586,7 @@ jpeg_idct8v:
cmp r0, r1
add r2, r2, #2
bcc 1b
ldmia sp!, { r4-r11, pc }
ldmpc regs=r4-r11
.size jpeg_idct8v, .-jpeg_idct8v
#if ARM_ARCH > 4
@ -631,7 +631,7 @@ jpeg_idct8h:
add r1, r1, r3
cmp r0, r2
bcc 1b
ldmia sp!, { r4-r11, pc }
ldmpc regs=r4-r11
2:
ldr r14, =4433
ldr r12, =-15137
@ -826,9 +826,9 @@ jpeg_idct8h:
add r1, r1, r3
cmp r0, r2
bcc 1b
ldmia sp!, { r4-r11, pc }
ldmpc regs=r4-r11
.size jpeg_idct8h, .-jpeg_idct8h
#else
#else /* ARMv6+ */
jpeg_idct8v:
stmdb sp!, { r4-r11, lr }
add r2, r0, #128

View file

@ -710,6 +710,26 @@ Lyre prototype 1 */
#define ROCKBOX_STRICT_ALIGN 1
#endif
#if defined(CPU_ARM) && defined(__ASSEMBLER__)
/* ARMv4T doesn't switch the T bit when popping pc directly, we must use BX */
.macro ldmpc cond="", order="ia", regs:req
#if ARM_ARCH == 4 && defined(USE_THUMB)
ldm\cond\order sp!, { \regs, lr }
bx\cond lr
#else
ldm\cond\order sp!, { \regs, pc }
#endif
.endm
.macro ldrpc cond=""
#if ARM_ARCH == 4 && defined(USE_THUMB)
ldr\cond lr, [sp], #4
bx\cond lr
#else
ldr\cond pc, [sp], #4
#endif
.endm
#endif
#ifndef CODEC_SIZE
#define CODEC_SIZE 0
#endif

View file

@ -241,7 +241,7 @@ lcd_write_yuv420_lines:
tst r7, #DBOP_BUSY @ fifo not empty?
beq 1b @
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
@ -545,6 +545,6 @@ lcd_write_yuv420_lines_odither:
tst r7, #DBOP_BUSY @ fifo not empty?
beq 1b @
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -19,6 +19,7 @@
*
****************************************************************************/
#include "config.h"
#include "as3525.h"
.text
@ -90,5 +91,5 @@ lcd_grey_data:
ands r5, r5, #(1<<10) @ wait until push fifo empties
beq 1b
ldmfd sp!, {r4-r7, pc}
ldmpc regs=r4-r7
.size lcd_grey_data,.-lcd_grey_data

View file

@ -146,7 +146,7 @@ copy_read_sectors:
strb r3, [r0], #1 /* store final byte */
ldmfd sp!, {r4, r5, pc}
ldmpc regs=r4-r5
/* 16-bit aligned */
.r_aligned:
@ -195,7 +195,7 @@ copy_read_sectors:
ldrneh r3, [r2]
strneh r3, [r0], #2
ldmfd sp!, {r4, r5, pc}
ldmpc regs=r4-r5
.r_end:
.size copy_read_sectors,.r_end-copy_read_sectors
@ -300,7 +300,7 @@ copy_write_sectors:
orr r3, r3, r4, lsl #8
strh r3, [r2] /* write final halfword */
ldmfd sp!, {r4, r5, pc}
ldmpc regs=r4-r5
/* 16-bit aligned */
.w_aligned:
@ -349,7 +349,7 @@ copy_write_sectors:
ldrneh r3, [r0], #2
strneh r3, [r2]
ldmfd sp!, {r4, r5, pc}
ldmpc regs=r4-r5
.w_end:
.size copy_write_sectors,.w_end-copy_write_sectors

View file

@ -97,7 +97,7 @@ lcd_write_data_shifted:
subs r1, r1, #1
bne .sloop
ldmfd sp!, {r4, pc}
ldmpc regs=r4
.size lcd_write_data_shifted,.-lcd_write_data_shifted
#elif defined IPOD_MINI
@ -132,7 +132,7 @@ lcd_write_data_shifted:
subs r1, r1, #1
bne .sloop
ldr pc, [sp], #4
ldrpc
.size lcd_write_data_shifted,.-lcd_write_data_shifted
#endif
@ -179,7 +179,7 @@ lcd_mono_data:
subs r1, r1, #1
bne .mloop
ldmfd sp!, {r4, pc}
ldmpc regs=r4
.dibits:
.byte 0x00, 0x03, 0x0C, 0x0F, 0x30, 0x33, 0x3C, 0x3F
@ -267,6 +267,6 @@ lcd_grey_data:
subs r2, r2, #1
bne .greyloop
ldmfd sp!, {r4-r7, pc}
ldmpc regs=r4-r7
.size lcd_grey_data,.-lcd_grey_data

View file

@ -19,6 +19,8 @@
*
****************************************************************************/
#include "config.h"
.section .icode, "ax", %progbits
/****************************************************************************
@ -60,7 +62,7 @@ lcd_write_data: /* r1 = pixel count, must be even */
ldrne r3, [r0], #4
strne r3, [lr]
ldmfd sp!, {r4, pc}
ldmpc regs=r4
/****************************************************************************
* extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
@ -294,7 +296,7 @@ lcd_write_yuv420_lines:
ldr r3, [sp, #12]
add sp, sp, r3 /* deallocate buffer */
ldmfd sp!, { r4-r10, pc } /* restore registers */
ldmpc regs=r4-r10 /* restore registers */
.ltorg
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines

View file

@ -232,7 +232,7 @@ lcd_write_yuv420_lines:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
@ -533,6 +533,6 @@ lcd_write_yuv420_lines_odither:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -101,7 +101,7 @@ lcd_copy_buffer_rect: @
add r0, r0, r4, lsl #1 @
subs r3, r3, #1 @ next line
bgt 10b @ copy line @
ldmfd sp!, { r4-r11, pc } @ restore regs and return
ldmpc regs=r4-r11 @ restore regs and return
.ltorg @ dump constant pool
.size lcd_copy_buffer_rect, .-lcd_copy_buffer_rect
@ -344,7 +344,7 @@ lcd_write_yuv420_lines:
subs r2, r2, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r10, pc } @ restore registers and return
ldmpc regs=r4-r10 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
@ -691,6 +691,6 @@ lcd_write_yuv420_lines_odither:
subs r2, r2, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -98,7 +98,7 @@ memcpy:
strcsb r4, [r0], #1
strcsb ip, [r0]
ldmfd sp!, {r0, r4, pc}
ldmpc regs="r0, r4"
9: rsb ip, ip, #4
cmp ip, #2

View file

@ -112,7 +112,7 @@ memmove:
strneb r3, [r0, #-1]!
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
ldmfd sp!, {r0, r4, pc}
ldmpc regs="r0, r4"
9: cmp ip, #2
ldrgtb r3, [r1, #-1]!

View file

@ -69,7 +69,7 @@ memset:
stmgedb r0!, {r1, r3, ip, lr}
stmgedb r0!, {r1, r3, ip, lr}
bgt 3b
ldreq pc, [sp], #4 @ Now <64 bytes to go.
ldrpc cond=eq @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/

View file

@ -59,7 +59,7 @@ memset16:
stmgeia r0!, {r1, r3, ip, lr}
stmgeia r0!, {r1, r3, ip, lr}
bgt 2b
ldreq pc, [sp], #4 @ Now <64 bytes to go.
ldrpc cond=eq @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/

View file

@ -99,6 +99,6 @@ lcd_grey_data:
subs r2, r2, #1
bne .greyloop
ldmfd sp!, {r4-r7, pc}
ldmpc regs=r4-r7
.size lcd_grey_data,.-lcd_grey_data

View file

@ -243,7 +243,7 @@ lcd_write_yuv420_lines:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
@ -551,6 +551,6 @@ lcd_write_yuv420_lines_odither:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -74,7 +74,12 @@ void fiq_handler(void) ICODE_ATTR __attribute__((naked));
void fiq_handler(void)
{
asm volatile (
#if ARM_ARCH == 4 && defined(USE_THUMB)
"ldr r12, [pc, #-4] \n"
"bx r12 \n"
#else
"ldr pc, [pc, #-4] \n"
#endif
"fiq_function: \n"
".word 0 \n"
);

View file

@ -248,7 +248,7 @@ lcd_write_yuv420_lines:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
@ -565,6 +565,6 @@ lcd_write_yuv420_lines_odither:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -238,7 +238,7 @@ lcd_write_yuv420_lines:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r10, pc } @ restore registers and return
ldmpc regs=r4-r10 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
@ -545,6 +545,6 @@ lcd_write_yuv420_lines_odither:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -113,6 +113,6 @@ lcd_grey_data:
subs r2, r2, #1
bne .greyloop
ldmfd sp!, {r4-r5, pc}
ldmpc regs=r4-r5
.size lcd_grey_data,.-lcd_grey_data

View file

@ -232,7 +232,7 @@ lcd_write_yuv420_lines:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
@ -533,6 +533,6 @@ lcd_write_yuv420_lines_odither:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -238,7 +238,7 @@ lcd_write_yuv420_lines:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r10, pc } @ restore registers and return
ldmpc regs=r4-r10 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
@ -545,6 +545,6 @@ lcd_write_yuv420_lines_odither:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r11, pc } @ restore registers and return
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -77,7 +77,15 @@ static inline void load_context(const void* addr)
asm volatile(
"ldr r0, [%0, #40] \n" /* Load start pointer */
"cmp r0, #0 \n" /* Check for NULL */
"ldmneia %0, { r0, pc } \n" /* If not already running, jump to start */
/* If not already running, jump to start */
#if ARM_ARCH == 4 && defined(USE_THUMB)
"ldmneia %0, { r0, r12 } \n"
"bxne r12 \n"
#else
"ldmneia %0, { r0, pc } \n"
#endif
"ldmia %0, { r4-r11, sp, lr } \n" /* Load regs r4 to r14 from context */
: : "r" (addr) : "r0" /* only! */
);