x1000: optimize crt0.S, improve correctness

Replace inline section copy/fill loops with subroutines, which
reduces code size a bit and and handle zero size copies properly.

Remove the cache initialization loop as well. There's no actual
reason for this because the SPL initializes the caches and just
dropping the cache can even be harmful (in this case it wasn't,
because the SPL flushes the whole cache right before calling in).

Change-Id: I7cddc9ed6d060b1f1bdd75544297883d014cad2d
This commit is contained in:
Aidan MacDonald 2022-03-16 11:48:12 +00:00
parent 0df71c952c
commit c676736792

View file

@ -49,75 +49,38 @@ _header:
.ascii "ENDH" /* end of header structure */
/* Cache init */
li v0, 0x80000000
ori v1, v0, 0x4000
mtc0 zero, C0_TAGLO
mtc0 zero, C0_TAGHI
cache ICIndexStTag, 0(v0)
cache DCIndexStTag, 0(v0)
addiu v0, v0, 32
bne v0, v1, _cache_loop
/* Invalidate BTB */
mfc0 v0, C0_Config, 7
ori v0, v0, 2
mtc0 v0, C0_Config, 7
/* Copy IRAM from BSS to low memory. */
la t0, _iramcopy
la t1, _iramstart
la t2, _iramend
lw t3, 0(t0)
addiu t1, 4
addiu t0, 4
bne t1, t2, _iram_loop
sw t3, -4(t1)
la a0, _iramcopy
la a1, _iramstart
la a2, _iramend
bal _copy
#if 0
/* Copy TCSM from BSS */
la t0, _tcsmcopy
la t1, _tcsmstart
la t2, _tcsmend
lw t3, 0(t0)
addiu t0, 4
sw t3, 0(t1)
bne t1, t2, _tcsm_loop
addiu t1, 4
la a0, _tcsmcopy
la a1, _tcsmstart
la a2, _tcsmend
bal _copy
/* Clear the BSS segment (needed to zero-initialize C static values) */
la t0, _bssbegin
la t1, _bssend
beq t0, t1, _bss_done
addiu t0, 4
bne t0, t1, _bss_loop
sw zero, -4(t0)
la a0, _bssbegin
la a1, _bssend
bal _clear
move a2, $0
/* Set stack pointer and clear the stack */
la sp, stackend
la t0, stackbegin
li t1, 0xDEADBEEF
addiu t0, 4
bne t0, sp, _stack_loop
sw t1, -4(t0)
la a0, stackbegin
li a2, 0xDEADBEEF
bal _clear
move a1, sp
/* Clear the IRQ stack */
la k0, _irqstackend
la t0, _irqstackbegin
addiu t0, 4
bne t0, k0, _irqstack_loop
sw t1, -4(t0)
la a0, _irqstackbegin
bal _clear
move a1, k0
/* Jump to C code */
jal system_early_init
@ -125,6 +88,28 @@ _irqstack_loop:
j main
/* copy(void* src, void* dst, void* dst_end) */
beq a1, a2, 1f
addiu a1, 4
lw t0, 0(a0)
addiu a0, 4
b _copy
sw t0, -4(a1)
jr ra
/* clear(void* dst, void* dst_end, int value) */
beq a0, a1, 1f
addiu a0, 4
b _clear
sw a2, -4(a0)
jr ra
/* Exception entry points */
.section .vectors.1, "ax", %progbits
j tlb_refill_handler