From 6a6c6083fa69575334282d0c8f5dd688a2282188 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Wed, 16 Mar 2022 15:39:18 +0000 Subject: [PATCH] x1000: bootloader: fix Linux self-extracting kernel boot Basically, there's longstanding bug in Linux with self-extracting kernels on MIPS which just happened to manifest now on the M3K as a hang on boot. The fix is applied to the M3K and Q1 since they both use this type of kernel image. Change-Id: I17d2bad6eebd677cd6d2e0bf146450c71fcf1229 --- bootloader/x1000/boot.c | 42 +++++++++++++ bootloader/x1000/x1000bootloader.h | 5 +- firmware/export/linuxboot.h | 3 + firmware/linuxboot.c | 96 ++++++++++++++++++++++++++++++ 4 files changed, 143 insertions(+), 3 deletions(-) diff --git a/bootloader/x1000/boot.c b/bootloader/x1000/boot.c index 153c2277aa..d6dfd4a193 100644 --- a/bootloader/x1000/boot.c +++ b/bootloader/x1000/boot.c @@ -148,6 +148,35 @@ void boot_linux(void) * Be careful when modifying this code. */ +#if defined(FIIO_M3K) || defined(SHANLING_Q1) +uint32_t saved_kernel_entry __attribute__((section(".idata"))); +void kernel_thunk(long, long, long, long) __attribute__((section(".icode"))); + +void kernel_thunk(long a0, long a1, long a2, long a3) +{ + /* cache flush */ + commit_discard_idcache(); + + /* now we can jump to the kernel */ + typedef void(*entry_fn)(long, long, long, long); + entry_fn fn = (entry_fn)saved_kernel_entry; + fn(a0, a1, a2, a3); + while(1); +} + +static void patch_stub_call(void* patch_addr) +{ + uint32_t* code = patch_addr; + uint32_t stub_addr = (uint32_t)(void*)kernel_thunk; + + /* generate call to stub */ + code[0] = 0x3c190000 | (stub_addr >> 16); /* lui t9, stub_hi */ + code[1] = 0x37390000 | (stub_addr & 0xffff); /* ori t9, t9, stub_lo */ + code[2] = 0x0320f809; /* jalr t9 */ + code[3] = 0x00000000; /* nop */ +} +#endif + static __attribute__((unused)) void boot_of_helper(uint32_t addr, uint32_t flash_size, const char* args) { @@ -157,6 +186,19 @@ void boot_of_helper(uint32_t addr, uint32_t flash_size, const char* args) if(handle < 0) return; +#if defined(FIIO_M3K) || defined(SHANLING_Q1) + /* Fix for targets that use self-extracting kernel images */ + void* jump_addr = core_get_data(handle); + uint32_t entry_addr = mips_linux_stub_get_entry(&jump_addr, img_length); + if(entry_addr >= 0xa0000000 || entry_addr < 0x80000000) { + splash2(5*HZ, "Kernel patch failed", "Please send bugreport"); + return; + } + + saved_kernel_entry = entry_addr; + patch_stub_call(jump_addr); +#endif + gui_shutdown(); x1000_dualboot_load_pdma_fw(); diff --git a/bootloader/x1000/x1000bootloader.h b/bootloader/x1000/x1000bootloader.h index 587a820eaf..10f6c6e730 100644 --- a/bootloader/x1000/x1000bootloader.h +++ b/bootloader/x1000/x1000bootloader.h @@ -41,13 +41,12 @@ struct uimage_header; # define BL_SELECT_NAME "PLAY" # define BL_QUIT_NAME "POWER" # define BOOTBACKUP_FILE "/fiiom3k-boot.bin" -// FIXME: OF kernel hangs on the m3k -//# define OF_PLAYER_NAME "FiiO player" +# define OF_PLAYER_NAME "FiiO player" # define OF_PLAYER_ADDR 0x20000 # define OF_PLAYER_LENGTH (4 * 1024 * 1024) # define OF_PLAYER_ARGS OF_RECOVERY_ARGS \ " init=/linuxrc ubi.mtd=3 root=ubi0:rootfs ubi.mtd=4 rootfstype=ubifs rw loglevel=8" -//# define OF_RECOVERY_NAME "FiiO recovery" +# define OF_RECOVERY_NAME "FiiO recovery" # define OF_RECOVERY_ADDR 0x420000 # define OF_RECOVERY_LENGTH (5 * 1024 * 1024) # define OF_RECOVERY_ARGS \ diff --git a/firmware/export/linuxboot.h b/firmware/export/linuxboot.h index 7dbc213012..de6f24bf57 100644 --- a/firmware/export/linuxboot.h +++ b/firmware/export/linuxboot.h @@ -186,4 +186,7 @@ int uimage_load(struct uimage_header* uh, size_t* out_size, */ ssize_t uimage_fd_reader(void* buf, size_t size, void* ctx); +/* helper for patching broken self-extracting kernels on MIPS */ +uint32_t mips_linux_stub_get_entry(void** code_start, size_t code_size); + #endif /* __LINUXBOOT_H__ */ diff --git a/firmware/linuxboot.c b/firmware/linuxboot.c index 5b6ab314b3..aa907ac7bb 100644 --- a/firmware/linuxboot.c +++ b/firmware/linuxboot.c @@ -216,3 +216,99 @@ ssize_t uimage_fd_reader(void* buf, size_t size, void* ctx) int fd = (intptr_t)ctx; return read(fd, buf, size); } + +/* Linux's self-extracting kernels are broken on MIPS. The decompressor stub + * doesn't flush caches after extracting the kernel code which can cause the + * boot to fail horribly. This has been true since at least 2009 and at the + * time of writing (2022) it's *still* broken. + * + * The FiiO M3K and Shanling Q1 both have broken kernels of this type, so we + * work around this by replacing the direct call to the kernel entry point with + * a thunk that adds the necessary cache flush. + */ +uint32_t mips_linux_stub_get_entry(void** code_start, size_t code_size) +{ + /* The jump to the kernel entry point looks like this: + * + * move a0, s0 + * move a1, s1 + * move a2, s2 + * move a3, s3 + * ... + * la k0, KERNEL_ENTRY + * jr k0 + * --- or in kernels since 2021: --- + * la t9, KERNEL_ENTRY + * jalr t9 + * + * We're trying to identify this code and decode the kernel entry + * point address, and return a suitable address where we can patch + * in a call to our thunk. + */ + + /* We should only need to scan within the first 128 bytes + * but do up to 256 just in case. */ + uint32_t* start = *code_start; + uint32_t* end = start + (MIN(code_size, 256) + 3) / 4; + + /* Scan for the "move aN, sN" sequence */ + uint32_t* move_instr = start; + for(move_instr += 4; move_instr < end; ++move_instr) { + if(move_instr[-4] == 0x02002021 && /* move a0, s0 */ + move_instr[-3] == 0x02202821 && /* move a1, s1 */ + move_instr[-2] == 0x02403021 && /* move a2, s2 */ + move_instr[-1] == 0x02603821) /* move a3, s3 */ + break; + } + + if(move_instr == end) + return 0; + + /* Now search forward for the next jr/jalr instruction */ + int jreg = 0; + uint32_t* jump_instr = move_instr; + for(; jump_instr != end; ++jump_instr) { + if((jump_instr[0] & 0xfc1ff83f) == 0xf809 || + (jump_instr[0] & 0xfc00003f) == 0x8) { + /* jalr rN */ + jreg = (jump_instr[0] >> 21) & 0x1f; + break; + } + } + + /* Need room here for 4 instructions. Assume everything between the + * moves and the jump is safe to overwrite; otherwise, we'll need to + * take a different approach. + * + * Count +1 instruction for the branch delay slot and another +1 because + * "move_instr" points to the instruction following the last move. */ + if(jump_instr - move_instr + 2 < 4) + return 0; + if(!jreg) + return 0; + + /* Now scan from the end of the move sequence until the jump instruction + * and try to reconstruct the entry address. We check for lui/ori/addiu. */ + const uint32_t lui_mask = 0xffff0000; + const uint32_t lui = 0x3c000000 | (jreg << 16); + const uint32_t ori_mask = 0xffff0000; + const uint32_t ori = 0x34000000 | (jreg << 21) | (jreg << 16); + const uint32_t addiu_mask = 0xffff0000; + const uint32_t addiu = 0x24000000 | (jreg << 21) | (jreg << 16); + + /* Can use any initial value here */ + uint32_t jreg_val = 0xdeadbeef; + + for(uint32_t* instr = move_instr; instr != jump_instr; ++instr) { + if((instr[0] & lui_mask) == lui) + jreg_val = (instr[0] & 0xffff) << 16; + else if((instr[0] & ori_mask) == ori) + jreg_val |= instr[0] & 0xffff; + else if((instr[0] & addiu_mask) == addiu) + jreg_val += instr[0] & 0xffff; + } + + /* Success! Probably! */ + *code_start = move_instr; + return jreg_val; +}