x1000: optimize crt0.S, improve correctness
Replace inline section copy/fill loops with subroutines, which reduces code size a bit and and handle zero size copies properly. Remove the cache initialization loop as well. There's no actual reason for this because the SPL initializes the caches and just dropping the cache can even be harmful (in this case it wasn't, because the SPL flushes the whole cache right before calling in). Change-Id: I7cddc9ed6d060b1f1bdd75544297883d014cad2d
This commit is contained in:
parent
0df71c952c
commit
c676736792
1 changed files with 43 additions and 58 deletions
|
@ -49,75 +49,38 @@ _header:
|
|||
.ascii "ENDH" /* end of header structure */
|
||||
|
||||
_realstart:
|
||||
/* Cache init */
|
||||
li v0, 0x80000000
|
||||
ori v1, v0, 0x4000
|
||||
mtc0 zero, C0_TAGLO
|
||||
mtc0 zero, C0_TAGHI
|
||||
_cache_loop:
|
||||
cache ICIndexStTag, 0(v0)
|
||||
cache DCIndexStTag, 0(v0)
|
||||
addiu v0, v0, 32
|
||||
bne v0, v1, _cache_loop
|
||||
nop
|
||||
|
||||
/* Invalidate BTB */
|
||||
mfc0 v0, C0_Config, 7
|
||||
nop
|
||||
ori v0, v0, 2
|
||||
mtc0 v0, C0_Config, 7
|
||||
nop
|
||||
|
||||
/* Copy IRAM from BSS to low memory. */
|
||||
la t0, _iramcopy
|
||||
la t1, _iramstart
|
||||
la t2, _iramend
|
||||
_iram_loop:
|
||||
lw t3, 0(t0)
|
||||
addiu t1, 4
|
||||
addiu t0, 4
|
||||
bne t1, t2, _iram_loop
|
||||
sw t3, -4(t1)
|
||||
la a0, _iramcopy
|
||||
la a1, _iramstart
|
||||
la a2, _iramend
|
||||
bal _copy
|
||||
nop
|
||||
|
||||
#if 0
|
||||
/* Copy TCSM from BSS */
|
||||
la t0, _tcsmcopy
|
||||
la t1, _tcsmstart
|
||||
la t2, _tcsmend
|
||||
_tcsm_loop:
|
||||
lw t3, 0(t0)
|
||||
addiu t0, 4
|
||||
sw t3, 0(t1)
|
||||
bne t1, t2, _tcsm_loop
|
||||
addiu t1, 4
|
||||
#endif
|
||||
la a0, _tcsmcopy
|
||||
la a1, _tcsmstart
|
||||
la a2, _tcsmend
|
||||
bal _copy
|
||||
nop
|
||||
|
||||
/* Clear the BSS segment (needed to zero-initialize C static values) */
|
||||
la t0, _bssbegin
|
||||
la t1, _bssend
|
||||
beq t0, t1, _bss_done
|
||||
_bss_loop:
|
||||
addiu t0, 4
|
||||
bne t0, t1, _bss_loop
|
||||
sw zero, -4(t0)
|
||||
_bss_done:
|
||||
la a0, _bssbegin
|
||||
la a1, _bssend
|
||||
bal _clear
|
||||
move a2, $0
|
||||
|
||||
/* Set stack pointer and clear the stack */
|
||||
la sp, stackend
|
||||
la t0, stackbegin
|
||||
li t1, 0xDEADBEEF
|
||||
_stack_loop:
|
||||
addiu t0, 4
|
||||
bne t0, sp, _stack_loop
|
||||
sw t1, -4(t0)
|
||||
la a0, stackbegin
|
||||
li a2, 0xDEADBEEF
|
||||
bal _clear
|
||||
move a1, sp
|
||||
|
||||
/* Clear the IRQ stack */
|
||||
la k0, _irqstackend
|
||||
la t0, _irqstackbegin
|
||||
_irqstack_loop:
|
||||
addiu t0, 4
|
||||
bne t0, k0, _irqstack_loop
|
||||
sw t1, -4(t0)
|
||||
la a0, _irqstackbegin
|
||||
bal _clear
|
||||
move a1, k0
|
||||
|
||||
/* Jump to C code */
|
||||
jal system_early_init
|
||||
|
@ -125,6 +88,28 @@ _irqstack_loop:
|
|||
j main
|
||||
nop
|
||||
|
||||
/* copy(void* src, void* dst, void* dst_end) */
|
||||
_copy:
|
||||
beq a1, a2, 1f
|
||||
addiu a1, 4
|
||||
lw t0, 0(a0)
|
||||
addiu a0, 4
|
||||
b _copy
|
||||
sw t0, -4(a1)
|
||||
1:
|
||||
jr ra
|
||||
nop
|
||||
|
||||
/* clear(void* dst, void* dst_end, int value) */
|
||||
_clear:
|
||||
beq a0, a1, 1f
|
||||
addiu a0, 4
|
||||
b _clear
|
||||
sw a2, -4(a0)
|
||||
1:
|
||||
jr ra
|
||||
nop
|
||||
|
||||
/* Exception entry points */
|
||||
.section .vectors.1, "ax", %progbits
|
||||
j tlb_refill_handler
|
||||
|
|
Loading…
Reference in a new issue