x1000: bootloader: fix Linux self-extracting kernel boot

Basically, there's longstanding bug in Linux with self-extracting
kernels on MIPS which just happened to manifest now on the M3K as
a hang on boot. The fix is applied to the M3K and Q1 since they
both use this type of kernel image.

Change-Id: I17d2bad6eebd677cd6d2e0bf146450c71fcf1229
This commit is contained in:
Aidan MacDonald 2022-03-16 15:39:18 +00:00
parent 44fbb1a593
commit 6a6c6083fa
4 changed files with 143 additions and 3 deletions

View file

@ -148,6 +148,35 @@ void boot_linux(void)
* Be careful when modifying this code.
*/
#if defined(FIIO_M3K) || defined(SHANLING_Q1)
uint32_t saved_kernel_entry __attribute__((section(".idata")));
void kernel_thunk(long, long, long, long) __attribute__((section(".icode")));
void kernel_thunk(long a0, long a1, long a2, long a3)
{
/* cache flush */
commit_discard_idcache();
/* now we can jump to the kernel */
typedef void(*entry_fn)(long, long, long, long);
entry_fn fn = (entry_fn)saved_kernel_entry;
fn(a0, a1, a2, a3);
while(1);
}
static void patch_stub_call(void* patch_addr)
{
uint32_t* code = patch_addr;
uint32_t stub_addr = (uint32_t)(void*)kernel_thunk;
/* generate call to stub */
code[0] = 0x3c190000 | (stub_addr >> 16); /* lui t9, stub_hi */
code[1] = 0x37390000 | (stub_addr & 0xffff); /* ori t9, t9, stub_lo */
code[2] = 0x0320f809; /* jalr t9 */
code[3] = 0x00000000; /* nop */
}
#endif
static __attribute__((unused))
void boot_of_helper(uint32_t addr, uint32_t flash_size, const char* args)
{
@ -157,6 +186,19 @@ void boot_of_helper(uint32_t addr, uint32_t flash_size, const char* args)
if(handle < 0)
return;
#if defined(FIIO_M3K) || defined(SHANLING_Q1)
/* Fix for targets that use self-extracting kernel images */
void* jump_addr = core_get_data(handle);
uint32_t entry_addr = mips_linux_stub_get_entry(&jump_addr, img_length);
if(entry_addr >= 0xa0000000 || entry_addr < 0x80000000) {
splash2(5*HZ, "Kernel patch failed", "Please send bugreport");
return;
}
saved_kernel_entry = entry_addr;
patch_stub_call(jump_addr);
#endif
gui_shutdown();
x1000_dualboot_load_pdma_fw();

View file

@ -41,13 +41,12 @@ struct uimage_header;
# define BL_SELECT_NAME "PLAY"
# define BL_QUIT_NAME "POWER"
# define BOOTBACKUP_FILE "/fiiom3k-boot.bin"
// FIXME: OF kernel hangs on the m3k
//# define OF_PLAYER_NAME "FiiO player"
# define OF_PLAYER_NAME "FiiO player"
# define OF_PLAYER_ADDR 0x20000
# define OF_PLAYER_LENGTH (4 * 1024 * 1024)
# define OF_PLAYER_ARGS OF_RECOVERY_ARGS \
" init=/linuxrc ubi.mtd=3 root=ubi0:rootfs ubi.mtd=4 rootfstype=ubifs rw loglevel=8"
//# define OF_RECOVERY_NAME "FiiO recovery"
# define OF_RECOVERY_NAME "FiiO recovery"
# define OF_RECOVERY_ADDR 0x420000
# define OF_RECOVERY_LENGTH (5 * 1024 * 1024)
# define OF_RECOVERY_ARGS \

View file

@ -186,4 +186,7 @@ int uimage_load(struct uimage_header* uh, size_t* out_size,
*/
ssize_t uimage_fd_reader(void* buf, size_t size, void* ctx);
/* helper for patching broken self-extracting kernels on MIPS */
uint32_t mips_linux_stub_get_entry(void** code_start, size_t code_size);
#endif /* __LINUXBOOT_H__ */

View file

@ -216,3 +216,99 @@ ssize_t uimage_fd_reader(void* buf, size_t size, void* ctx)
int fd = (intptr_t)ctx;
return read(fd, buf, size);
}
/* Linux's self-extracting kernels are broken on MIPS. The decompressor stub
* doesn't flush caches after extracting the kernel code which can cause the
* boot to fail horribly. This has been true since at least 2009 and at the
* time of writing (2022) it's *still* broken.
*
* The FiiO M3K and Shanling Q1 both have broken kernels of this type, so we
* work around this by replacing the direct call to the kernel entry point with
* a thunk that adds the necessary cache flush.
*/
uint32_t mips_linux_stub_get_entry(void** code_start, size_t code_size)
{
/* The jump to the kernel entry point looks like this:
*
* move a0, s0
* move a1, s1
* move a2, s2
* move a3, s3
* ...
* la k0, KERNEL_ENTRY
* jr k0
* --- or in kernels since 2021: ---
* la t9, KERNEL_ENTRY
* jalr t9
*
* We're trying to identify this code and decode the kernel entry
* point address, and return a suitable address where we can patch
* in a call to our thunk.
*/
/* We should only need to scan within the first 128 bytes
* but do up to 256 just in case. */
uint32_t* start = *code_start;
uint32_t* end = start + (MIN(code_size, 256) + 3) / 4;
/* Scan for the "move aN, sN" sequence */
uint32_t* move_instr = start;
for(move_instr += 4; move_instr < end; ++move_instr) {
if(move_instr[-4] == 0x02002021 && /* move a0, s0 */
move_instr[-3] == 0x02202821 && /* move a1, s1 */
move_instr[-2] == 0x02403021 && /* move a2, s2 */
move_instr[-1] == 0x02603821) /* move a3, s3 */
break;
}
if(move_instr == end)
return 0;
/* Now search forward for the next jr/jalr instruction */
int jreg = 0;
uint32_t* jump_instr = move_instr;
for(; jump_instr != end; ++jump_instr) {
if((jump_instr[0] & 0xfc1ff83f) == 0xf809 ||
(jump_instr[0] & 0xfc00003f) == 0x8) {
/* jalr rN */
jreg = (jump_instr[0] >> 21) & 0x1f;
break;
}
}
/* Need room here for 4 instructions. Assume everything between the
* moves and the jump is safe to overwrite; otherwise, we'll need to
* take a different approach.
*
* Count +1 instruction for the branch delay slot and another +1 because
* "move_instr" points to the instruction following the last move. */
if(jump_instr - move_instr + 2 < 4)
return 0;
if(!jreg)
return 0;
/* Now scan from the end of the move sequence until the jump instruction
* and try to reconstruct the entry address. We check for lui/ori/addiu. */
const uint32_t lui_mask = 0xffff0000;
const uint32_t lui = 0x3c000000 | (jreg << 16);
const uint32_t ori_mask = 0xffff0000;
const uint32_t ori = 0x34000000 | (jreg << 21) | (jreg << 16);
const uint32_t addiu_mask = 0xffff0000;
const uint32_t addiu = 0x24000000 | (jreg << 21) | (jreg << 16);
/* Can use any initial value here */
uint32_t jreg_val = 0xdeadbeef;
for(uint32_t* instr = move_instr; instr != jump_instr; ++instr) {
if((instr[0] & lui_mask) == lui)
jreg_val = (instr[0] & 0xffff) << 16;
else if((instr[0] & ori_mask) == ori)
jreg_val |= instr[0] & 0xffff;
else if((instr[0] & addiu_mask) == addiu)
jreg_val += instr[0] & 0xffff;
}
/* Success! Probably! */
*code_start = move_instr;
return jreg_val;
}