From e232f69214cfe0ad83e8a6051c58ef54754e59bb Mon Sep 17 00:00:00 2001 From: Nikita Burnashev Date: Sun, 16 Apr 2023 12:46:55 +0300 Subject: [PATCH] atjboottool: gzipped fw files, option for big-endian fw, clarify ECIES in fwu Added fw modifications required to unpack real world player dumps. Documented more fwu header fields, magic numbers and finite field arithmetics (extended Euclidean for inverse, long division for reducing modulo field_poly). v3 encryption used is standard RC4 with the key additionally ciphered by the Elliptic Curve Integrated Encryption Scheme. Either sect233k1 (NIST K-233) or sect163r2 (NIST B-163) curves can be used, with the former overwhelmingly prevailing, being hardwired in SDK's maker.exe. Using a private/public key scheme is superfluous because both are stored in the firmware, with the added level of complexity likely serving the purpose of obfuscation. The private key is generated at random with each invokation. None of KDF or MAC from ECIES are used, RC4 key is directly xored with the shared secret. The random number r used to calculate rG isn't stored, but that's unimportant since only krG == rkG is actually used in the encryption. Change-Id: Ieacf8cc744bc90c7c5582dd724b2c10a41bfc191 --- utils/atj2137/atjboottool/Makefile | 2 +- utils/atj2137/atjboottool/atj_tables.c | 18 +- utils/atj2137/atjboottool/atj_tables.h | 20 +- utils/atj2137/atjboottool/atjboottool.c | 37 +- utils/atj2137/atjboottool/fw.c | 115 ++++- utils/atj2137/atjboottool/fw.h | 2 +- utils/atj2137/atjboottool/fwu.c | 622 +++++++++++++----------- 7 files changed, 499 insertions(+), 317 deletions(-) diff --git a/utils/atj2137/atjboottool/Makefile b/utils/atj2137/atjboottool/Makefile index 8d96bcdde0..5414b4810e 100644 --- a/utils/atj2137/atjboottool/Makefile +++ b/utils/atj2137/atjboottool/Makefile @@ -2,7 +2,7 @@ DEFINES= CC=gcc LD=gcc CFLAGS=-g -std=c99 -W -Wall $(DEFINES) -LDFLAGS= +LDFLAGS=-lz BINS=atjboottool all: $(BINS) diff --git a/utils/atj2137/atjboottool/atj_tables.c b/utils/atj2137/atjboottool/atj_tables.c index 579e17d52b..e2bd976bfe 100644 --- a/utils/atj2137/atjboottool/atj_tables.c +++ b/utils/atj2137/atjboottool/atj_tables.c @@ -20,7 +20,7 @@ ****************************************************************************/ #include -uint8_t g_check_block_A_table[1024] = +uint8_t g_decode_A_table[1024] = { 0x16, 0x2b, 0x01, 0xe4, 0x0e, 0x3d, 0xc1, 0xdf, 0x0f, 0x35, 0x8f, 0xf5, 0xe2, 0x48, 0xa0, 0x2e, 0x1c, 0x6a, 0x57, 0xea, 0x6d, 0x9a, 0xe2, 0x03, 0xec, 0xe8, @@ -109,45 +109,45 @@ uint8_t g_decode_B_table[20] = 0xf8, 0xb4, 0x36, 0x41, 0xc5, 0x51, 0xaf }; -uint32_t g_crypto_table[8] = +uint32_t g_sect233k1_G_x[8] = { 0xefad6126, 0x0a4c9d6e, 0x19c26bf5, 0x149563a4, 0x29f22ff4, 0x7e731af1, 0x32ba853a, 0x00000172 }; -uint32_t g_crypto_table2[8] = +uint32_t g_sect233k1_G_y[8] = { 0x56fae6a3, 0x56e0c110, 0xf18aeb9b, 0x27a8cd9b, 0x555a67c4, 0x19b7f70f, 0x537dece8, 0x000001db }; -uint32_t g_crypto_key6[8] = +uint32_t g_sect233k1_b[8] = { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -uint32_t g_crypto_key3[6] = +uint32_t g_sect163r2_G_x[6] = { 0xe8343e36, 0xd4994637, 0xa0991168, 0x86a2d57e, 0xf0eba162, 0x00000003 }; -uint32_t g_crypto_key4[6] = +uint32_t g_sect163r2_G_y[6] = { 0x797324f1, 0xb11c5c0c, 0xa2cdd545, 0x71a0094f, 0xd51fbc6c, 0x00000000 }; -uint32_t g_atj_ec163_a[6] = +uint32_t g_sect163r2_a[6] = { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -uint32_t g_crypto_key5[6] = +uint32_t g_sect163r2_b[6] = { 0x4a3205fd, 0x512f7874, 0x1481eb10, 0xb8c953ca, 0x0a601907, 0x00000002 }; -uint32_t g_atj_ec233_a[8] = +uint32_t g_sect233k1_a[8] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, }; diff --git a/utils/atj2137/atjboottool/atj_tables.h b/utils/atj2137/atjboottool/atj_tables.h index d5eb4ae5d4..5ed75d6ac4 100644 --- a/utils/atj2137/atjboottool/atj_tables.h +++ b/utils/atj2137/atjboottool/atj_tables.h @@ -21,15 +21,15 @@ #ifndef __ATJ_TABLES__ #define __ATJ_TABLES__ -uint8_t g_check_block_A_table[1024]; -uint8_t g_decode_B_table[20]; -uint32_t g_crypto_table[8]; -uint32_t g_crypto_table2[8]; -uint32_t g_crypto_key6[8]; -uint32_t g_crypto_key3[6]; -uint32_t g_crypto_key4[6]; -uint32_t g_crypto_key5[6]; -uint32_t g_atj_ec233_a[8]; -uint32_t g_atj_ec163_a[6]; +extern uint8_t g_decode_A_table[1024]; +extern uint8_t g_decode_B_table[20]; +extern uint32_t g_sect233k1_G_x[8]; +extern uint32_t g_sect233k1_G_y[8]; +extern uint32_t g_sect233k1_b[8]; +extern uint32_t g_sect163r2_G_x[6]; +extern uint32_t g_sect163r2_G_y[6]; +extern uint32_t g_sect163r2_a[6]; +extern uint32_t g_sect163r2_b[6]; +extern uint32_t g_sect233k1_a[8]; #endif // __ATJ_TABLES__ diff --git a/utils/atj2137/atjboottool/atjboottool.c b/utils/atj2137/atjboottool/atjboottool.c index d0ad1b468b..b68ada980e 100644 --- a/utils/atj2137/atjboottool/atjboottool.c +++ b/utils/atj2137/atjboottool/atjboottool.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "misc.h" #include "fwu.h" #include "afi.h" @@ -100,7 +101,26 @@ static int unpack_afi_fw_cb(const char *filename, uint8_t *buf, size_t size) FILE *f = fopen(name, "wb"); if(f) { - fwrite(buf, size, 1, f); + if (0 != memcmp(buf, "\x1f\x8b\x8\0\0\0\0\0\0\xb", 10)) + fwrite(buf, size, 1, f); + else + { + uint8_t buf_out[8192]; + z_stream zs; + int err = Z_OK; + cprintf(GREEN, "inflating... "); + memset(&zs, 0, sizeof(zs)); + zs.next_in = buf + 10; + zs.avail_in = size - 10; + inflateInit2(&zs, -MAX_WBITS); /* raw */ + while (err == Z_OK) + { + zs.next_out = buf_out; + zs.avail_out = sizeof(buf_out); + err = inflate(&zs, Z_NO_FLUSH); + fwrite(buf_out, 1, sizeof(buf_out) - zs.avail_out, f); + } + } fclose(f); cprintf(RED, "Ok\n"); return 0; @@ -119,10 +139,10 @@ static int do_afi(uint8_t *buf, size_t size) return afi_unpack(buf, size, &unpack_afi_fw_cb); } -static int do_fw(uint8_t *buf, size_t size) +static int do_fw(uint8_t *buf, size_t size, bool big_endian) { build_out_prefix(".unpack", "", true); - return fw_unpack(buf, size, &unpack_afi_fw_cb); + return fw_unpack(buf, size, &unpack_afi_fw_cb, big_endian); } static void usage(void) { @@ -135,6 +155,7 @@ static void usage(void) printf(" --fwu Unpack a FWU firmware file\n"); printf(" --afi Unpack a AFI archive file\n"); printf(" --fw Unpack a FW archive file\n"); + printf(" --fw251 Big-endian FW archive used on Flip80251\n"); printf(" --atj2127 Force ATJ2127 decryption mode\n"); printf("The default is to try to guess the format.\n"); printf("If several formats are specified, all are tried.\n"); @@ -147,6 +168,7 @@ int main(int argc, char **argv) bool try_fwu = false; bool try_afi = false; bool try_fw = false; + bool big_endian = false; enum fwu_mode_t fwu_mode = FWU_AUTO; while(1) @@ -159,11 +181,12 @@ int main(int argc, char **argv) {"fwu", no_argument, 0, 'u'}, {"afi", no_argument, 0, 'a'}, {"fw", no_argument, 0, 'w'}, + {"fw251", no_argument, 0, 'b'}, {"atj2127", no_argument, 0, '2'}, {0, 0, 0, 0} }; - int c = getopt_long(argc, argv, "hdco:a2", long_options, NULL); + int c = getopt_long(argc, argv, "hdco:a2b", long_options, NULL); if(c == -1) break; switch(c) @@ -192,6 +215,10 @@ int main(int argc, char **argv) case 'w': try_fw = true; break; + case 'b': + try_fw = true; + big_endian = true; + break; case '2': fwu_mode = FWU_ATJ2127; break; @@ -238,7 +265,7 @@ int main(int argc, char **argv) else if(try_afi || afi_check(buf, size)) ret = do_afi(buf, size); else if(try_fw || fw_check(buf, size)) - ret = do_fw(buf, size); + ret = do_fw(buf, size, big_endian); else { cprintf(GREY, "No valid format found\n"); diff --git a/utils/atj2137/atjboottool/fw.c b/utils/atj2137/atjboottool/fw.c index 07ce4e2f31..114123813b 100644 --- a/utils/atj2137/atjboottool/fw.c +++ b/utils/atj2137/atjboottool/fw.c @@ -38,7 +38,7 @@ struct fw_entry_t uint16_t version; uint32_t block_offset; // offset shift by 9 uint32_t size; - uint32_t unk; + uint32_t bytes; uint32_t checksum; } __attribute__((packed)); @@ -78,7 +78,8 @@ struct fw_hdr_f0_t uint8_t sig[FW_SIG_SIZE]; uint8_t res[12]; uint32_t checksum; - uint8_t res2[492]; + uint8_t res2[490]; + uint16_t header_checksum; struct fw_entry_t entry[FW_ENTRIES]; } __attribute__((packed)); @@ -97,14 +98,56 @@ static void build_filename_fw(char buf[16], struct fw_entry_t *ent) { int pos = 0; for(int i = 0; i < 8 && ent->name[i] != ' '; i++) - buf[pos++] = ent->name[i]; + buf[pos++] = tolower(ent->name[i]); buf[pos++] = '.'; for(int i = 0; i < 3 && ent->ext[i] != ' '; i++) - buf[pos++] = ent->ext[i]; + buf[pos++] = tolower(ent->ext[i]); buf[pos] = 0; } -int fw_unpack(uint8_t *buf, size_t size, fw_extract_callback_t unpack_cb) +static inline uint32_t u32_endian_swap(uint32_t u32) +{ + return ((u32 & 0xff000000u) >> 24) | + ((u32 & 0x00ff0000u) >> 8) | + ((u32 & 0x0000ff00u) << 8) | + ((u32 & 0x000000ffu) << 24); +} + +static uint32_t big_endian_checksum(void *ptr, size_t size) +{ + uint32_t crc = 0; + uint32_t *cp = ptr; + for(; size >= 4; size -= 4) + crc += u32_endian_swap(*cp++); + /* FIXME all observed sizes divisible by 4, unclear how to add remainder */ + return crc; +} + +static inline uint16_t u16_endian_swap(uint16_t u16) +{ + return ((u16 & 0xff00u) >> 8) | + ((u16 & 0x00ffu) << 8); +} + +static uint16_t lfi_header_checksum(void *ptr, size_t size, bool big_endian) +{ + uint16_t crc = 0; + uint16_t *cp = ptr; + if (big_endian) + { + for(; size >= 2; size -= 2) + crc += u16_endian_swap(*cp++); + return u16_endian_swap(crc); /* to make comparable with the stored one */ + } + else + { + for(; size >= 2; size -= 2) + crc += *cp++; + return crc; + } +} + +int fw_unpack(uint8_t *buf, size_t size, fw_extract_callback_t unpack_cb, bool big_endian) { struct fw_hdr_t *hdr = (void *)buf; @@ -165,8 +208,32 @@ int fw_unpack(uint8_t *buf, size_t size, fw_extract_callback_t unpack_cb) } else { - /* struct fw_hdr_f0_t *hdr_f0 = (void *)hdr; */ - cprintf(GREEN, " Header not dumped because format is unclear.\n"); + struct fw_hdr_f0_t *hdr_f0 = (void *)hdr; + uint32_t chk; + if (big_endian) + chk = u32_endian_swap(big_endian_checksum(buf + 0x200, 0x1e00)); + else + chk = afi_checksum(buf + 0x200, 0x1e00); + cprintf_field(" Directory checksum: ", "0x%x ", hdr_f0->checksum); + if(chk != hdr_f0->checksum) + { + cprintf(RED, "Mismatch, 0x%x expected\n", chk); + return 1; + } + else + cprintf(RED, "Ok\n"); + + uint16_t header_chk = lfi_header_checksum(buf, 510, big_endian); + cprintf_field(" Header checksum: ", "0x%x ", hdr_f0->header_checksum); + if(header_chk != hdr_f0->header_checksum) + { + cprintf(RED, "Mismatch, 0x%x expected\n", header_chk); + return 1; + } + else + cprintf(RED, "Ok\n"); + + cprintf(GREEN, " Rest of header not dumped because format is unclear.\n"); } cprintf(BLUE, "Entries\n"); @@ -175,15 +242,29 @@ int fw_unpack(uint8_t *buf, size_t size, fw_extract_callback_t unpack_cb) if(hdr->entry[i].name[0] == 0) continue; struct fw_entry_t *entry = &hdr->entry[i]; + if (big_endian) + { + /* must be in-place for correct load checksum later */ + entry->block_offset = u32_endian_swap(entry->block_offset); + entry->size = u32_endian_swap(entry->size); + entry->checksum = u32_endian_swap(entry->checksum); + } char filename[16]; build_filename_fw(filename, entry); cprintf(RED, " %s\n", filename); cprintf_field(" Attr: ", "%02x\n", entry->attr); cprintf_field(" Offset: ", "0x%x\n", entry->block_offset << 9); cprintf_field(" Size: ", "0x%x\n", entry->size); - cprintf_field(" Unknown: ", "%x\n", entry->unk); + cprintf_field(" Bytes: ", "0x%x\n", entry->bytes); cprintf_field(" Checksum: ", "0x%x ", entry->checksum); - uint32_t chk = afi_checksum(buf + (entry->block_offset << 9), entry->size); + if (entry->bytes == 0) + entry->bytes = entry->size; + memset(buf + (entry->block_offset << 9) + entry->bytes, 0, entry->size - entry->bytes); + uint32_t chk; + if (big_endian) + chk = big_endian_checksum(buf + (entry->block_offset << 9), entry->size); + else + chk = afi_checksum(buf + (entry->block_offset << 9), entry->size); if(chk != entry->checksum) { cprintf(RED, "Mismatch\n"); @@ -191,11 +272,25 @@ int fw_unpack(uint8_t *buf, size_t size, fw_extract_callback_t unpack_cb) } else cprintf(RED, "Ok\n"); - int ret = unpack_cb(filename, buf + (entry->block_offset << 9), entry->size); + int ret = unpack_cb(filename, buf + (entry->block_offset << 9), entry->bytes); if(ret != 0) return ret; } + if (big_endian) + { + uint32_t load_checksum = *(uint32_t *)(buf + size - 4); + uint32_t load_chk = big_endian_checksum(buf, size - 512); + cprintf_field(" Load checksum: ", "0x%x ", load_checksum); + if(load_chk != load_checksum) + { + cprintf(RED, "Mismatch, 0x%x expected\n", load_chk); + return 1; + } + else + cprintf(RED, "Ok\n"); + } + return 0; } diff --git a/utils/atj2137/atjboottool/fw.h b/utils/atj2137/atjboottool/fw.h index 95f8087116..9047de4b6d 100644 --- a/utils/atj2137/atjboottool/fw.h +++ b/utils/atj2137/atjboottool/fw.h @@ -27,7 +27,7 @@ * its name and content. If the callback returns a nonzero value, the function will stop and return * that value. Returns 0 on success */ typedef int (*fw_extract_callback_t)(const char *name, uint8_t *buf, size_t size); -int fw_unpack(uint8_t *buf, size_t size, fw_extract_callback_t cb); +int fw_unpack(uint8_t *buf, size_t size, fw_extract_callback_t cb, bool big_endian); /* Check if a file looks like an AFI file */ bool fw_check(uint8_t *buf, size_t size); diff --git a/utils/atj2137/atjboottool/fwu.c b/utils/atj2137/atjboottool/fwu.c index 4d09dd876e..c9f6c440fd 100644 --- a/utils/atj2137/atjboottool/fwu.c +++ b/utils/atj2137/atjboottool/fwu.c @@ -59,6 +59,46 @@ struct fwu_crypto_hdr_t uint8_t key[32]; } __attribute__((packed)); +struct fwu_sector0_tail_t +{ + uint8_t unk_2; + uint32_t unk_x808; + uint32_t unk_8; + uint8_t key_B[16]; + uint8_t guid[16]; + uint8_t unk_190; + uint8_t super_secret_xor[16]; + uint8_t timestamp[8]; + uint8_t unk_0; + uint8_t guid_filler[20]; + uint8_t unk_1; + uint8_t check[20]; +} __attribute__((packed)); + +struct fwu_block_A_hdr_t +{ + uint16_t block_A_size; + uint8_t unk_0_a; + uint8_t unk_1_a; + uint8_t key_B[16]; + uint8_t guid_filler[256]; + uint8_t ec_sz; + uint8_t unk_0_b; + uint32_t unk_5; + uint32_t unk_x505; + uint16_t unk_1_b; + uint8_t timestamp[8]; +} __attribute__((packed)); + +struct fwu_block_B_hdr_t +{ + uint16_t block_B_size; + uint8_t unk_1_a; + uint16_t unk_1_b; + uint8_t timestamp[8]; + uint16_t guid_filler_size; +} __attribute__((packed)); + struct fwu_tail_t { uint8_t length; /* in blocks? it's always 1 */ @@ -95,41 +135,32 @@ typedef struct ec_point_t uint32_t *y; }ec_point_t; -struct block_A_info_t +struct ec_info_t { int nr_bits; - uint16_t field_2; - int nr_words; - int nr_dwords_x12; + int point_size; uint32_t *ec_a; // size - uint32_t *ptr7; // size + uint32_t *ec_b; // size uint32_t *field_poly; // size uint32_t size; - uint32_t field_1C; - ec_point_t ptr1; - uint32_t *ptr3; // size - uint32_t *ptr4; // size - int nr_words2; + ec_point_t pt_G; + ec_point_t pt_kG; // calculated ECIES public key uint32_t field_bits; - int nr_dwords_x8; + int size_x2; int nr_bytes; - int nr_bytes2; int nr_dwords_m1; - int nr_dwords_x2_m1; int nr_dwords_x2; + int nr_dwords_x2_m1; int nr_dwords; - uint32_t field_54; - uint32_t field_58; }; -struct block_A_info_t g_decode_A_info; -uint8_t g_subblock_A[0x128]; +struct ec_info_t g_ec_info; +struct fwu_block_A_hdr_t g_subblock_A; uint8_t g_key_B[20]; -uint8_t g_perm_B[258]; -uint8_t g_crypto_info_byte; -uint8_t *g_decode_buffer; -uint8_t *g_decode_buffer2; -void *g_decode_buffer3; +uint8_t g_rc4_S[258]; +uint8_t g_field_sz_byte; +ec_point_t g_public_key; // from block A +uint32_t *g_private_key; // from block B #include "atj_tables.h" #include @@ -186,7 +217,7 @@ int get_version(uint8_t *buf, unsigned long size) static int decode_block_A(uint8_t block[1020]) { - uint8_t *p = &g_check_block_A_table[32 * (block[998] & 0x1f)]; + uint8_t *p = &g_decode_A_table[32 * (block[998] & 31)]; uint8_t key[32]; for(int i = 0; i < 20; i++) @@ -197,50 +228,53 @@ static int decode_block_A(uint8_t block[1020]) for(int i = 20; i < 32; i++) key[i] = key[i - 20]; - for(int i = 0; i < 992; i++) - block[i] ^= key[i % 32] ^ g_check_block_A_table[i]; + for(int i = 0; i < 31 * 32; i++) + block[i] ^= key[i % 32] ^ g_decode_A_table[i]; + // FIXME dereferencing block - 1 is undefined behavior in standard C return check_block(block - 1, block + 1000, 1001); } -static void compute_perm(uint8_t *keybuf, size_t size, uint8_t perm[258]) +// https://en.wikipedia.org/wiki/RC4#Key-scheduling_algorithm_(KSA) +static void rc4_key_schedule(uint8_t *key, size_t keylength, uint8_t S[258]) { for(int i = 0; i < 256; i++) - perm[i] = i; - perm[256] = perm[257] = 0; - uint8_t idx = 0; + S[i] = i; + S[256] = S[257] = 0; + uint8_t j = 0; for(int i = 0; i < 256; i++) { - uint8_t v = perm[i]; - idx = (v + keybuf[i % size] + idx) % 256; - perm[i] = perm[idx]; - perm[idx] = v; + j = (j + S[i] + key[i % keylength]) % 256; + uint8_t tmp = S[i]; + S[i] = S[j]; + S[j] = tmp; } } -static void decode_perm(uint8_t *buf, size_t size, uint8_t perm[258]) +// https://en.wikipedia.org/wiki/RC4#Pseudo-random_generation_algorithm_(PRGA) +static void rc4_stream_cipher(uint8_t *buf, size_t size, uint8_t S[258]) { - uint8_t idxa = perm[256]; - uint8_t idxb = perm[257]; - for(size_t i = 0; i < size; i++) + uint8_t i = S[256]; + uint8_t j = S[257]; + for(size_t k = 0; k < size; k++) { - idxa = (idxa + 1) % 256; - uint8_t v = perm[idxa]; - idxb = (idxb + v) % 256; - perm[idxa] = perm[idxb]; - perm[idxb] = v; - buf[i] ^= perm[(v + perm[idxa]) % 256]; + i = (i + 1) % 256; + j = (j + S[i]) % 256; + uint8_t tmp = S[i]; + S[i] = S[j]; + S[j] = tmp; + buf[k] ^= S[(S[i] + S[j]) % 256]; } } -static void decode_block_with_perm(uint8_t *keybuf, int keysize, - uint8_t *buf, int bufsize, uint8_t perm[258]) +static void rc4_cipher_block(uint8_t *keybuf, int keysize, + uint8_t *buf, int bufsize, uint8_t S[258]) { - compute_perm(keybuf, keysize, perm); - decode_perm(buf, bufsize, perm); + rc4_key_schedule(keybuf, keysize, S); + rc4_stream_cipher(buf, bufsize, S); } -static void apply_perm(uint8_t *inbuf, uint8_t *outbuf, size_t size, int swap) +static void rc4_key_swap(uint8_t *inbuf, uint8_t *outbuf, size_t size, int swap) { memcpy(outbuf, inbuf, size); int a = swap & 0xf; @@ -250,16 +284,16 @@ static void apply_perm(uint8_t *inbuf, uint8_t *outbuf, size_t size, int swap) outbuf[b] = v; } -static void decode_block_with_swap(uint8_t keybuf[32], int swap, - uint8_t *buf, int bufsize, uint8_t perm[258]) +static void rc4_key_swap_and_decode(uint8_t keybuf[32], int swap, + uint8_t *buf, int bufsize, uint8_t S[258]) { uint8_t keybuf_interm[32]; - apply_perm(keybuf, keybuf_interm, 32, swap); - decode_block_with_perm(keybuf_interm, 32, buf, bufsize, perm); + rc4_key_swap(keybuf, keybuf_interm, 32, swap); + rc4_cipher_block(keybuf_interm, 32, buf, bufsize, S); } -static void clear_memory(void *buf, size_t size_dwords) +static void gf_zero(void *buf, size_t size_dwords) { memset(buf, 0, 4 * size_dwords); } @@ -269,40 +303,35 @@ static void set_bit(int bit_pos, uint32_t *buf) buf[bit_pos / 32] |= 1 << (bit_pos % 32); } -static int fill_decode_info(uint8_t sz) +static int fill_ec_info(uint8_t sz) { if(sz == 2) sz = 233; else if(sz == 3) sz = 163; else return 1; - g_decode_A_info.nr_bits = sz; - g_decode_A_info.nr_bytes2 = sz / 8 + (sz % 8 != 0); - g_decode_A_info.nr_words = 2 * g_decode_A_info.nr_bytes2; - g_decode_A_info.nr_bytes = sz / 8 + (sz % 8 != 0); - g_decode_A_info.nr_words2 = 2 * g_decode_A_info.nr_bytes2; - g_decode_A_info.nr_dwords = sz / 32 + (sz % 32 != 0); - g_decode_A_info.size = 4 * g_decode_A_info.nr_dwords; - g_decode_A_info.nr_dwords_x8 = 8 * g_decode_A_info.nr_dwords; - g_decode_A_info.nr_dwords_m1 = g_decode_A_info.nr_dwords - 1; - g_decode_A_info.nr_dwords_x2 = 2 * g_decode_A_info.nr_dwords; - g_decode_A_info.nr_dwords_x2_m1 = g_decode_A_info.nr_dwords_x2 - 1; - g_decode_A_info.nr_dwords_x12 = 12 * g_decode_A_info.nr_dwords; - g_decode_A_info.ptr1.x = malloc(4 * g_decode_A_info.nr_dwords); - g_decode_A_info.ptr1.y = malloc(g_decode_A_info.size); - g_decode_A_info.ptr3 = malloc(g_decode_A_info.size); - g_decode_A_info.ptr4 = malloc(g_decode_A_info.size); - g_decode_A_info.field_poly = malloc(g_decode_A_info.size); - g_decode_A_info.ec_a = malloc(g_decode_A_info.size); - g_decode_A_info.ptr7 = malloc(g_decode_A_info.size); + g_ec_info.nr_bits = sz; + g_ec_info.nr_bytes = sz / 8 + (sz % 8 != 0); + g_ec_info.point_size = 2 * g_ec_info.nr_bytes; + g_ec_info.nr_dwords = sz / 32 + (sz % 32 != 0); + g_ec_info.size = 4 * g_ec_info.nr_dwords; + g_ec_info.size_x2 = 8 * g_ec_info.nr_dwords; + g_ec_info.nr_dwords_m1 = g_ec_info.nr_dwords - 1; + g_ec_info.nr_dwords_x2 = 2 * g_ec_info.nr_dwords; + g_ec_info.nr_dwords_x2_m1 = g_ec_info.nr_dwords_x2 - 1; + g_ec_info.pt_G.x = malloc(4 * g_ec_info.nr_dwords); + g_ec_info.pt_G.y = malloc(g_ec_info.size); + g_ec_info.pt_kG.x = malloc(g_ec_info.size); + g_ec_info.pt_kG.y = malloc(g_ec_info.size); + g_ec_info.field_poly = malloc(g_ec_info.size); + g_ec_info.ec_a = malloc(g_ec_info.size); + g_ec_info.ec_b = malloc(g_ec_info.size); - cprintf(BLUE, " Decode Info:\n"); - cprintf_field(" Nr Bits: ", "%d\n", g_decode_A_info.nr_bits); - cprintf_field(" Nr Bytes: ", "%d\n", g_decode_A_info.nr_bytes); - cprintf_field(" Nr Bytes 2: ", "%d\n", g_decode_A_info.nr_bytes2); - cprintf_field(" Nr Words: ", "%d\n", g_decode_A_info.nr_words); - cprintf_field(" Nr Words 2: ", "%d\n", g_decode_A_info.nr_words2); - cprintf_field(" Nr DWords: ", "%d\n", g_decode_A_info.nr_dwords); - cprintf_field(" Size: ", "%d\n", g_decode_A_info.size); + cprintf(BLUE, " Elliptic curve info:\n"); + cprintf_field(" Field Bits: ", "%d\n", g_ec_info.nr_bits); + cprintf_field(" Field Bytes: ", "%d\n", g_ec_info.nr_bytes); + cprintf_field(" Point Size: ", "%d\n", g_ec_info.point_size); + cprintf_field(" Field DWords: ", "%d\n", g_ec_info.nr_dwords); + cprintf_field(" Size: ", "%d\n", g_ec_info.size); return 0; } @@ -313,29 +342,31 @@ static int process_block_A(uint8_t block[1024]) int ret = decode_block_A(block + 4); cprintf(GREEN, " Check: "); check_field(ret, 0, "Pass\n", "Fail\n"); - print_hex("BlockA", block, 1024); + // print_hex("BlockA", block, 1024); - memcpy(g_subblock_A, block, sizeof(g_subblock_A)); - ret = fill_decode_info(g_subblock_A[276]); + memcpy(&g_subblock_A, block, sizeof(g_subblock_A)); + // assert(offsetof(struct fwu_block_A_hdr_t, ec_sz) == 276); + ret = fill_ec_info(g_subblock_A.ec_sz); cprintf(GREEN, " Info: "); check_field(ret, 0, "Pass\n", "Fail\n"); - int tmp = 2 * g_decode_A_info.nr_bytes2 + 38; + int tmp = 2 * g_ec_info.nr_bytes + 38; int offset = 1004 - tmp + 5; - g_crypto_info_byte = block[offset - 1]; - g_decode_buffer = malloc(g_decode_A_info.size); - g_decode_buffer2 = malloc(g_decode_A_info.size); + g_field_sz_byte = block[offset - 1]; + g_public_key.x = malloc(g_ec_info.size); + g_public_key.y = malloc(g_ec_info.size); - memset(g_decode_buffer, 0, g_decode_A_info.size); - memset(g_decode_buffer2, 0, g_decode_A_info.size); + memset(g_public_key.x, 0, g_ec_info.size); + memset(g_public_key.y, 0, g_ec_info.size); - memcpy(g_decode_buffer, &block[offset], g_decode_A_info.nr_bytes2); - int offset2 = g_decode_A_info.nr_bytes2 + offset; - memcpy(g_decode_buffer2, &block[offset2], g_decode_A_info.nr_bytes2); + memcpy(g_public_key.x, &block[offset], g_ec_info.nr_bytes); + int offset2 = g_ec_info.nr_bytes + offset; + memcpy(g_public_key.y, &block[offset2], g_ec_info.nr_bytes); - cprintf_field(" Word: ", "%d ", *(uint16_t *)&g_subblock_A[286]); - check_field(*(uint16_t *)&g_subblock_A[286], 1, "Ok\n", "Mismatch\n"); + // assert(offsetof(struct fwu_block_A_hdr_t, unk_1_b) == 286); + cprintf_field(" Word: ", "%d ", g_subblock_A.unk_1_b); + check_field(g_subblock_A.unk_1_b, 1, "Ok\n", "Mismatch\n"); return 0; } @@ -353,12 +384,12 @@ static void decode_key_B(uint8_t buf[20], uint8_t buf2[16], uint8_t key[20]) static void decode_block_B(uint8_t *buf, uint8_t key[16], size_t size) { decode_key_B(&buf[size], key, g_key_B); - decode_block_with_perm(g_key_B, 20, buf, size, g_perm_B); + rc4_cipher_block(g_key_B, 20, buf, size, g_rc4_S); } static int find_last_bit_set(uint32_t *buf, bool a) { - int i = a ? g_decode_A_info.nr_dwords_m1 : g_decode_A_info.nr_dwords_x2_m1; + int i = a ? g_ec_info.nr_dwords_m1 : g_ec_info.nr_dwords_x2_m1; while(i >= 0 && buf[i] == 0) i--; @@ -370,15 +401,15 @@ static int find_last_bit_set(uint32_t *buf, bool a) return -1; // unreachable } -static void copy_memory(uint32_t *to, uint32_t *from) +static void gf_copy(uint32_t *to, uint32_t *from) { - for(int i = 0; i < g_decode_A_info.nr_dwords; i++) + for(int i = 0; i < g_ec_info.nr_dwords; i++) to[i] = from[i]; } -static void swap_memory(uint32_t *a, uint32_t *b) +static void gf_swap(uint32_t *a, uint32_t *b) { - for(int i = 0; i < g_decode_A_info.nr_dwords; i++) + for(int i = 0; i < g_ec_info.nr_dwords; i++) { uint32_t c = a[i]; a[i] = b[i]; @@ -388,11 +419,11 @@ static void swap_memory(uint32_t *a, uint32_t *b) static void shift_left(uint32_t *buf, int nr_bits) { - for(int i = g_decode_A_info.nr_dwords_m1; i >= 0; i--) + for(int i = g_ec_info.nr_dwords_m1; i >= 0; i--) buf[i + (nr_bits / 32)] = buf[i]; memset(buf, 0, 4 * (nr_bits / 32)); - size_t size = g_decode_A_info.nr_dwords + (nr_bits + 31) / 32; + size_t size = g_ec_info.nr_dwords + (nr_bits + 31) / 32; nr_bits = nr_bits % 32; uint32_t acc = 0; @@ -407,9 +438,9 @@ static void shift_left(uint32_t *buf, int nr_bits) } } -static void xor_big(uint32_t *res, uint32_t *a, uint32_t *b) +static void gf_add_x2(uint32_t *res, uint32_t *a, uint32_t *b) { - for(int i = 0; i < g_decode_A_info.nr_dwords_x2; i++) + for(int i = 0; i < g_ec_info.nr_dwords_x2; i++) res[i] = a[i] ^ b[i]; } @@ -433,39 +464,49 @@ static void print_poly(const char *name, uint32_t *poly, int nr_dwords) cprintf(OFF, "\n"); } -static void gf_inverse(uint32_t *res, uint32_t *val) +/* https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm#Simple_algebraic_field_extensions + * invariant: p * s + a * t == r -> a * t == r (mod p) + * loop until only lowest bit set (r == 1) -> inverse in t */ +static void gf_inverse(uint32_t *newt, uint32_t *val) { - uint32_t *tmp = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *copy = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *copy_arg = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *tmp2 = malloc(g_decode_A_info.nr_dwords_x8); - clear_memory(tmp, g_decode_A_info.nr_dwords_x2); - clear_memory(res, g_decode_A_info.nr_dwords); - *res = 1; - clear_memory(tmp2, g_decode_A_info.nr_dwords); - copy_memory(copy_arg, val); - copy_memory(copy, (uint32_t *)g_decode_A_info.field_poly); + uint32_t *tmp = malloc(g_ec_info.size_x2); + uint32_t *r = malloc(g_ec_info.size_x2); + uint32_t *newr = malloc(g_ec_info.size_x2); + uint32_t *t = malloc(g_ec_info.size_x2); + gf_zero(tmp, g_ec_info.nr_dwords_x2); + /* newt := 1 */ + gf_zero(newt, g_ec_info.nr_dwords); + *newt = 1; + /* t := 0 */ + gf_zero(t, g_ec_info.nr_dwords); + /* newr := a */ + gf_copy(newr, val); + /* r := p */ + gf_copy(r, g_ec_info.field_poly); - for(int i = find_last_bit_set(copy_arg, 1); i; i = find_last_bit_set(copy_arg, 1)) + for(int i = find_last_bit_set(newr, 1); i; i = find_last_bit_set(newr, 1)) { - int pos = i - find_last_bit_set(copy, 1); + /* pos := degree(newr) - degree(r) */ + int pos = i - find_last_bit_set(r, 1); if(pos < 0) { - swap_memory(copy_arg, copy); - swap_memory(res, tmp2); + gf_swap(newr, r); + gf_swap(newt, t); pos = -pos; } - copy_memory(tmp, copy); + /* newr := newr - x^pos * r */ + gf_copy(tmp, r); shift_left(tmp, pos); - xor_big(copy_arg, copy_arg, tmp); - copy_memory(tmp, tmp2); + gf_add_x2(newr, newr, tmp); + /* newt := newt - x^pos * t */ + gf_copy(tmp, t); shift_left(tmp, pos); - xor_big(res, res, tmp); + gf_add_x2(newt, newt, tmp); } free(tmp); - free(copy); - free(copy_arg); - free(tmp2); + free(r); + free(newr); + free(t); } static void shift_left_one(uint32_t *a) @@ -488,15 +529,15 @@ static void shift_left_one(uint32_t *a) #if 1 static void gf_mult(uint32_t *res, uint32_t *a2, uint32_t *a3) { - uint32_t *tmp2 = malloc(g_decode_A_info.nr_dwords_x8); - clear_memory(tmp2, g_decode_A_info.nr_dwords_x2); - copy_memory(tmp2, a3); + uint32_t *tmp2 = malloc(g_ec_info.size_x2); + gf_zero(tmp2, g_ec_info.nr_dwords_x2); + gf_copy(tmp2, a3); - int pos = g_decode_A_info.nr_dwords; + int pos = g_ec_info.nr_dwords; uint32_t mask = 1; for(int i = 0; i < 32; i++) { - for(int j = 0; j < g_decode_A_info.nr_dwords; j++) + for(int j = 0; j < g_ec_info.nr_dwords; j++) { if(a2[j] & mask) for(int k = 0; k < pos; k++) @@ -511,8 +552,8 @@ static void gf_mult(uint32_t *res, uint32_t *a2, uint32_t *a3) #else static void gf_mult(uint32_t *res, uint32_t *a2, uint32_t *a3) { - for(int i = 0; i < 32 * g_decode_A_info.nr_dwords; i++) - for(int j = 0; j < 32 * g_decode_A_info.nr_dwords; j++) + for(int i = 0; i < 32 * g_ec_info.nr_dwords; i++) + for(int j = 0; j < 32 * g_ec_info.nr_dwords; j++) { int k = i + j; uint32_t v1 = (a2[i / 32] >> (i % 32)) & 1; @@ -522,32 +563,35 @@ static void gf_mult(uint32_t *res, uint32_t *a2, uint32_t *a3) } #endif -static void gf_mod(uint32_t *inout, uint32_t *other) +// https://en.wikipedia.org/wiki/Polynomial_long_division#Pseudocode +static void gf_mod(uint32_t *r, uint32_t *field_poly) { - uint32_t *tmp = malloc(g_decode_A_info.nr_dwords_x8); - int v4 = g_decode_A_info.field_bits; - int pos = find_last_bit_set(inout, 0); - for(int i = pos - v4; i >= 0; i = find_last_bit_set(inout, 0) - v4) + uint32_t *tmp = malloc(g_ec_info.size_x2); + int deg_d = g_ec_info.field_bits; + int deg_r = find_last_bit_set(r, 0); + /* i := degree(lead(r) / lead(d)) */ + for(int i = deg_r - deg_d; i >= 0; i = find_last_bit_set(r, 0) - deg_d) { - clear_memory(tmp, g_decode_A_info.nr_dwords_x2); - copy_memory(tmp, other); + /* r := r - x^i * d */ + gf_zero(tmp, g_ec_info.nr_dwords_x2); + gf_copy(tmp, field_poly); shift_left(tmp, i); - xor_big(inout, inout, tmp); + gf_add_x2(r, r, tmp); } free(tmp); } static void gf_add(uint32_t *res, uint32_t *a, uint32_t *b) { - for(int i = 0; i < g_decode_A_info.nr_dwords; i++) + for(int i = 0; i < g_ec_info.nr_dwords; i++) res[i] = a[i] ^ b[i]; } static void print_point(const char *name, ec_point_t *ptr) { cprintf(BLUE, "%s\n", name); - print_poly(" x: ", ptr->x, g_decode_A_info.nr_dwords); - print_poly(" y: ", ptr->y, g_decode_A_info.nr_dwords); + print_poly(" x: ", ptr->x, g_ec_info.nr_dwords); + print_poly(" y: ", ptr->y, g_ec_info.nr_dwords); } static uint32_t g_gf_one[9] = @@ -557,42 +601,42 @@ static uint32_t g_gf_one[9] = static void ec_double(ec_point_t *point, ec_point_t *res) { - uint32_t *v2 = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *v3 = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *v4 = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *v5 = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *v6 = malloc(g_decode_A_info.nr_dwords_x8); - clear_memory(res->x, g_decode_A_info.nr_dwords); - clear_memory(res->y, g_decode_A_info.nr_dwords); - clear_memory(v3, g_decode_A_info.nr_dwords_x2); - clear_memory(v6, g_decode_A_info.nr_dwords_x2); - clear_memory(v4, g_decode_A_info.nr_dwords_x2); + uint32_t *v2 = malloc(g_ec_info.size_x2); + uint32_t *v3 = malloc(g_ec_info.size_x2); + uint32_t *v4 = malloc(g_ec_info.size_x2); + uint32_t *v5 = malloc(g_ec_info.size_x2); + uint32_t *v6 = malloc(g_ec_info.size_x2); + gf_zero(res->x, g_ec_info.nr_dwords); + gf_zero(res->y, g_ec_info.nr_dwords); + gf_zero(v3, g_ec_info.nr_dwords_x2); + gf_zero(v6, g_ec_info.nr_dwords_x2); + gf_zero(v4, g_ec_info.nr_dwords_x2); /* v4 := 1/x */ gf_inverse(v4, point->x); - clear_memory(v5, g_decode_A_info.nr_dwords_x2); + gf_zero(v5, g_ec_info.nr_dwords_x2); /* v5 := y/x */ gf_mult(v5, v4, point->y); - gf_mod(v5, g_decode_A_info.field_poly); + gf_mod(v5, g_ec_info.field_poly); /* v2 := x + y/x (lambda) */ gf_add(v2, point->x, v5); /* v4 := ec_a + lambda */ - gf_add(v4, v2, g_decode_A_info.ec_a); - clear_memory(v3, g_decode_A_info.nr_dwords_x2); + gf_add(v4, v2, g_ec_info.ec_a); + gf_zero(v3, g_ec_info.nr_dwords_x2); /* v3 := lambda^2 */ gf_mult(v3, v2, v2); - gf_mod(v3, g_decode_A_info.field_poly); + gf_mod(v3, g_ec_info.field_poly); /* x' := lambda + lambda^2 + ec_a */ gf_add(res->x, v4, v3); - clear_memory(v5, g_decode_A_info.nr_dwords_x2); + gf_zero(v5, g_ec_info.nr_dwords_x2); /* v4 := lambda + g_gf_one */ gf_add(v4, v2, g_gf_one); /* v5 := (lambda + 1) * x' = lambda.x' + x' */ gf_mult(v5, v4, res->x); - gf_mod(v5, g_decode_A_info.field_poly); - clear_memory(v6, g_decode_A_info.nr_dwords_x2); + gf_mod(v5, g_ec_info.field_poly); + gf_zero(v6, g_ec_info.nr_dwords_x2); /* v6 := x1^2 */ gf_mult(v6, point->x, point->x); - gf_mod(v6, g_decode_A_info.field_poly); + gf_mod(v6, g_ec_info.field_poly); /* y' = (lambda + g_gf_one) * x + x^2 = x^2 + lambda.x + x */ gf_add(res->y, v5, v6); free(v2); @@ -604,31 +648,31 @@ static void ec_double(ec_point_t *point, ec_point_t *res) static void ec_add(ec_point_t *a1, ec_point_t *a2, ec_point_t *res) { - uint32_t *v3 = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *v4 = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *v5 = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *v6 = malloc(g_decode_A_info.nr_dwords_x8); - uint32_t *v7 = malloc(g_decode_A_info.nr_dwords_x8); - clear_memory(res->x, g_decode_A_info.nr_dwords); - clear_memory(res->y, g_decode_A_info.nr_dwords); - clear_memory(v4, g_decode_A_info.nr_dwords_x2); - clear_memory(v7, g_decode_A_info.nr_dwords_x2); + uint32_t *v3 = malloc(g_ec_info.size_x2); + uint32_t *v4 = malloc(g_ec_info.size_x2); + uint32_t *v5 = malloc(g_ec_info.size_x2); + uint32_t *v6 = malloc(g_ec_info.size_x2); + uint32_t *v7 = malloc(g_ec_info.size_x2); + gf_zero(res->x, g_ec_info.nr_dwords); + gf_zero(res->y, g_ec_info.nr_dwords); + gf_zero(v4, g_ec_info.nr_dwords_x2); + gf_zero(v7, g_ec_info.nr_dwords_x2); /* v5 = y1 + y2 */ gf_add(v5, a1->y, a2->y); /* v6 = x1 + x2 */ gf_add(v6, a1->x, a2->x); /* v7 = 1/(x1 + x2) */ gf_inverse(v7, v6); - clear_memory(v3, g_decode_A_info.nr_dwords_x2); + gf_zero(v3, g_ec_info.nr_dwords_x2); /* v3 = (y1 + y2) / (x1 + x2) (lambda) */ gf_mult(v3, v7, v5); - gf_mod(v3, g_decode_A_info.field_poly); + gf_mod(v3, g_ec_info.field_poly); /* v5 = lambda + ec_a */ - gf_add(v5, v3, g_decode_A_info.ec_a); - clear_memory(v4, g_decode_A_info.nr_dwords_x2); + gf_add(v5, v3, g_ec_info.ec_a); + gf_zero(v4, g_ec_info.nr_dwords_x2); /* v4 = lambda^2 */ gf_mult(v4, v3, v3); - gf_mod(v4, g_decode_A_info.field_poly); + gf_mod(v4, g_ec_info.field_poly); /* v7 = lambda^2 + lambda + ec_a */ gf_add(v7, v5, v4); /* x' = ec_a + x1 + x2 + lambda + lambda^2 */ @@ -637,10 +681,10 @@ static void ec_add(ec_point_t *a1, ec_point_t *a2, ec_point_t *res) gf_add(v5, a1->x, res->x); /* v6 = x' + y1 */ gf_add(v6, res->x, a1->y); - clear_memory(v7, g_decode_A_info.nr_dwords_x2); + gf_zero(v7, g_ec_info.nr_dwords_x2); /* v7 = (x1 + x').lambda */ gf_mult(v7, v5, v3); - gf_mod(v7, g_decode_A_info.field_poly); + gf_mod(v7, g_ec_info.field_poly); /* y' = (x1 + x').lambda + x' + y1 */ gf_add(res->y, v7, v6); free(v3); @@ -654,17 +698,17 @@ static int ec_mult(uint32_t *n, ec_point_t *point, ec_point_t *res) { ec_point_t res_others; - res_others.x = malloc(g_decode_A_info.size); - res_others.y = malloc(g_decode_A_info.size); - clear_memory(res->x, g_decode_A_info.nr_dwords); - clear_memory(res->y, g_decode_A_info.nr_dwords); - clear_memory(res_others.x, g_decode_A_info.nr_dwords); - clear_memory(res_others.y, g_decode_A_info.nr_dwords); + res_others.x = malloc(g_ec_info.size); + res_others.y = malloc(g_ec_info.size); + gf_zero(res->x, g_ec_info.nr_dwords); + gf_zero(res->y, g_ec_info.nr_dwords); + gf_zero(res_others.x, g_ec_info.nr_dwords); + gf_zero(res_others.y, g_ec_info.nr_dwords); int pos = find_last_bit_set(n, 1); /* res_other := point */ - copy_memory(res_others.x, point->x); - copy_memory(res_others.y, point->y); + gf_copy(res_others.x, point->x); + gf_copy(res_others.y, point->y); /* for all bit from SZ-1 downto 0 */ for(int bit = (pos % 32) - 1; bit >= 0; bit--) @@ -672,15 +716,15 @@ static int ec_mult(uint32_t *n, ec_point_t *point, ec_point_t *res) /* res := 2 * res_other */ ec_double(&res_others, res); /* res_other := res = 2 * res_other */ - copy_memory(res_others.x, res->x); - copy_memory(res_others.y, res->y); + gf_copy(res_others.x, res->x); + gf_copy(res_others.y, res->y); /* if bit of n is set */ if(n[pos / 32] & (1 << bit)) { /* res := res_other + point */ ec_add(&res_others, point, res); - copy_memory(res_others.x, res->x); - copy_memory(res_others.y, res->y); + gf_copy(res_others.x, res->x); + gf_copy(res_others.y, res->y); } } /* same but optimized */ @@ -689,18 +733,18 @@ static int ec_mult(uint32_t *n, ec_point_t *point, ec_point_t *res) for(int bit = 31; bit >= 0; bit--) { ec_double(&res_others, res); - copy_memory(res_others.x, res->x); - copy_memory(res_others.y, res->y); + gf_copy(res_others.x, res->x); + gf_copy(res_others.y, res->y); if(n[i] & (1 << bit)) { ec_add(&res_others, point, res); - copy_memory(res_others.x, res->x); - copy_memory(res_others.y, res->y); + gf_copy(res_others.x, res->x); + gf_copy(res_others.y, res->y); } } } - copy_memory(res->x, res_others.x); - copy_memory(res->y, res_others.y); + gf_copy(res->x, res_others.x); + gf_copy(res->y, res_others.y); free(res_others.x); free(res_others.y); return 0; @@ -709,7 +753,7 @@ static int ec_mult(uint32_t *n, ec_point_t *point, ec_point_t *res) static void xor_with_point(uint8_t *buf, ec_point_t *point) { /* - int sz = g_decode_A_info.nr_bytes2 - 1; + int sz = g_ec_info.nr_bytes - 1; if(sz <= 32) { for(int i = 0; i < sz; i++) @@ -723,39 +767,40 @@ static void xor_with_point(uint8_t *buf, ec_point_t *point) */ uint8_t *ptrA = (uint8_t *)point->x; uint8_t *ptrB = (uint8_t *)point->y; - int sz = MIN(g_decode_A_info.nr_bytes2 - 1, 32); + int sz = MIN(g_ec_info.nr_bytes - 1, 32); for(int i = 0; i < sz; i++) buf[i] ^= ptrA[i]; for(int i = sz; i < 32; i++) buf[i] ^= ptrB[i - sz]; } -static int crypto4(uint8_t *a1, ec_point_t *ptrs, uint32_t *a3) +// https://en.wikipedia.org/wiki/Integrated_Encryption_Scheme#Formal_description_of_ECIES +static int xor_with_shared_secret(uint8_t *buf, ec_point_t *pt_rG, uint32_t *private_key) { - ec_point_t ptrs_others; + ec_point_t shared_secret; - ptrs_others.x = malloc(g_decode_A_info.size); - ptrs_others.y = malloc(g_decode_A_info.size); - clear_memory(ptrs_others.x, g_decode_A_info.nr_dwords); - clear_memory(ptrs_others.y, g_decode_A_info.nr_dwords); - int ret = ec_mult(a3, ptrs, &ptrs_others); + shared_secret.x = malloc(g_ec_info.size); + shared_secret.y = malloc(g_ec_info.size); + gf_zero(shared_secret.x, g_ec_info.nr_dwords); + gf_zero(shared_secret.y, g_ec_info.nr_dwords); + int ret = ec_mult(private_key, pt_rG, &shared_secret); if(ret == 0) - xor_with_point(a1, &ptrs_others); - free(ptrs_others.x); - free(ptrs_others.y); + xor_with_point(buf, &shared_secret); + free(shared_secret.x); + free(shared_secret.y); return ret; } static int set_field_poly(uint32_t *field_poly, int field_sz) { - clear_memory(field_poly, g_decode_A_info.nr_dwords); - g_decode_A_info.field_bits = 0; + gf_zero(field_poly, g_ec_info.nr_dwords); + g_ec_info.field_bits = 0; if(field_sz == 4) { set_bit(0, field_poly); set_bit(74, field_poly); set_bit(233, field_poly); - g_decode_A_info.field_bits = 233; + g_ec_info.field_bits = 233; return 0; } else if (field_sz == 5) @@ -765,31 +810,31 @@ static int set_field_poly(uint32_t *field_poly, int field_sz) set_bit(6, field_poly); set_bit(7, field_poly); set_bit(163, field_poly); - g_decode_A_info.field_bits = 163; + g_ec_info.field_bits = 163; return 0; } else return 1; } -static int ec_init(ec_point_t *a1, char field_sz) +static int ec_init(ec_point_t *ec_G, char field_sz) { - int ret = set_field_poly(g_decode_A_info.field_poly, field_sz); + int ret = set_field_poly(g_ec_info.field_poly, field_sz); if(ret) return ret; if(field_sz == 4) { - copy_memory(a1->x, g_crypto_table); - copy_memory(a1->y, g_crypto_table2); - copy_memory(g_decode_A_info.ec_a, g_atj_ec233_a); - copy_memory(g_decode_A_info.ptr7, g_crypto_key6); + gf_copy(ec_G->x, g_sect233k1_G_x); + gf_copy(ec_G->y, g_sect233k1_G_y); + gf_copy(g_ec_info.ec_a, g_sect233k1_a); // zero + gf_copy(g_ec_info.ec_b, g_sect233k1_b); // never used return 0; } - else if(field_sz == 5 ) + else if(field_sz == 6 ) // yet to find even a single specimen { - copy_memory(a1->x, g_crypto_key3); - copy_memory(a1->y, g_crypto_key4); - copy_memory(g_decode_A_info.ec_a, g_atj_ec163_a); - copy_memory(g_decode_A_info.ptr7, g_crypto_key5); + gf_copy(ec_G->x, g_sect163r2_G_x); + gf_copy(ec_G->y, g_sect163r2_G_y); + gf_copy(g_ec_info.ec_a, g_sect163r2_a); + gf_copy(g_ec_info.ec_b, g_sect163r2_b); return 0; } else @@ -805,19 +850,21 @@ static void create_guid(void *uid, int bit_size) static int process_block_B(uint8_t block[512]) { + struct fwu_block_B_hdr_t *p_hdr = (void *)block; + cprintf(BLUE, "Block B\n"); - decode_block_B(block + 3, g_subblock_A + 4, 489); - cprintf_field(" Word: ", "%d ", *(uint16_t *)(block + 3)); - check_field(*(uint16_t *)(block + 3), 1, "Ok\n", "Mismatch\n"); + decode_block_B(block + 3, g_subblock_A.key_B, 492 - 3); + cprintf_field(" Word: ", "%d ", p_hdr->unk_1_b); + check_field(p_hdr->unk_1_b, 1, "Ok\n", "Mismatch\n"); int ret = check_block(block, block + 492, 492); cprintf(GREEN, " Check: "); check_field(ret, 0, "Pass\n", "Fail\n"); - g_decode_buffer3 = malloc(g_decode_A_info.size); - memset(g_decode_buffer3, 0, g_decode_A_info.size); - int offset = *(uint16_t *)(block + 13) + 16; - memcpy(g_decode_buffer3, &block[offset], g_decode_A_info.nr_bytes2); + g_private_key = malloc(g_ec_info.size); + memset(g_private_key, 0, g_ec_info.size); + int offset = sizeof *p_hdr + p_hdr->guid_filler_size + 1; + memcpy(g_private_key, &block[offset], g_ec_info.nr_bytes); return 0; } @@ -832,22 +879,22 @@ static int get_key_fwu_v3(size_t size, uint8_t *buf, uint8_t *blockA, uint8_t *b memset(smallblock, 0, sizeof(smallblock)); memset(bigblock, 0, sizeof(bigblock)); - uint8_t ba = buf[0x1ee] & 0xf; - uint8_t bb = buf[0x1fe] & 0xf; + *blockA = buf[0x1ee] & 15; + *blockB = buf[0x1fe] & 15; + size_t offsetA = 512 * (1 + *blockA); + size_t offsetB = 512 * (1 + *blockB); cprintf(BLUE, "Crypto\n"); - cprintf_field(" Block A: ", "%d\n", ba + 2); - cprintf_field(" Block B: ", "%d\n", ba + bb + 5); + cprintf_field(" Block A: ", "0x%zx\n", 512 + offsetA); + cprintf_field(" Block B: ", "0x%zx\n", 512 + offsetA + 1024 + offsetB); - *blockA = buf[494] & 0xf; - *blockB = buf[510] & 0xf; - memcpy(bigblock, &buf[512 * (*blockA + 2)], sizeof(bigblock)); + memcpy(bigblock, &buf[512 + offsetA], sizeof(bigblock)); int ret = process_block_A(bigblock); if(ret != 0) return ret; - memcpy(smallblock, &buf[512 * (*blockA + *blockB + 5)], sizeof(smallblock)); + memcpy(smallblock, &buf[512 + offsetA + 1024 + offsetB], sizeof(smallblock)); ret = process_block_B(smallblock); if(ret != 0) return ret; @@ -859,64 +906,77 @@ static int get_key_fwu_v3(size_t size, uint8_t *buf, uint8_t *blockA, uint8_t *b cprintf_field(" Byte: ", "%d ", crypto_hdr.unk); check_field(crypto_hdr.unk, 3, "Ok\n", "Mismatch\n"); - ec_point_t ptrs; - ptrs.x = malloc(g_decode_A_info.size); - ptrs.y = malloc(g_decode_A_info.size); - memset(ptrs.x, 0, g_decode_A_info.size); - memset(ptrs.y, 0, g_decode_A_info.size); - memcpy(ptrs.x, buf + 91, g_decode_A_info.nr_bytes2); - memcpy(ptrs.y, buf + 91 + g_decode_A_info.nr_bytes2, g_decode_A_info.nr_bytes2); + size_t offset = sizeof(struct fwu_hdr_t) + sizeof(struct fwu_crypto_hdr_t); + ec_point_t pt_rG; + pt_rG.x = malloc(g_ec_info.size); + pt_rG.y = malloc(g_ec_info.size); + memset(pt_rG.x, 0, g_ec_info.size); + memset(pt_rG.y, 0, g_ec_info.size); + memcpy(pt_rG.x, buf + offset, g_ec_info.nr_bytes); + memcpy(pt_rG.y, buf + offset + g_ec_info.nr_bytes, g_ec_info.nr_bytes); - ret = ec_init(&g_decode_A_info.ptr1, g_crypto_info_byte); - cprintf(GREEN, " Crypto bits copy: "); + ret = ec_init(&g_ec_info.pt_G, g_field_sz_byte); + cprintf(GREEN, " Elliptic curve init: "); check_field(ret, 0, "Pass\n", "Fail\n"); - ret = crypto4(crypto_hdr.key, &ptrs, g_decode_buffer3); - cprintf(GREEN, " Crypto 4: "); + ec_mult(g_private_key, &g_ec_info.pt_G, &g_ec_info.pt_kG); + cprintf(GREEN, " Public key check: "); + if (memcmp(g_public_key.x, g_ec_info.pt_kG.x, g_ec_info.nr_bytes) || + memcmp(g_public_key.y, g_ec_info.pt_kG.y, g_ec_info.nr_bytes)) + { + cprintf(RED, "Fail\n"); + return 1; + } + else + cprintf(RED, "Pass\n"); + + ret = xor_with_shared_secret(crypto_hdr.key, &pt_rG, g_private_key); + cprintf(GREEN, " ECIES decryption: "); check_field(ret, 0, "Pass\n", "Fail\n"); memcpy(keybuf, crypto_hdr.key, 32); - int offset = g_decode_A_info.nr_words + 91; + offset += g_ec_info.point_size; - decode_block_with_swap(keybuf, 0, &buf[offset], 512 - offset, g_perm_B); + rc4_key_swap_and_decode(keybuf, 0, &buf[offset], 512 - offset, g_rc4_S); int pos = *(uint16_t *)&buf[offset]; - cprintf_field(" Word: ", "%d ", pos); - int tmp = g_decode_A_info.nr_words2 + 199; + cprintf_field(" Filler size: ", "%d ", pos); + int tmp = offset + sizeof(struct fwu_sector0_tail_t); check_field(pos, 510 - tmp, "Ok\n", "Mismatch\n"); - uint8_t midbuf[108]; - memcpy(midbuf, &buf[pos + offset + 2], sizeof(midbuf)); + struct fwu_sector0_tail_t tail; + memcpy(&tail, &buf[offset + 2 + pos], sizeof(tail)); - cprintf_field(" Byte: ", "%d ", midbuf[0]); - check_field(midbuf[0], 2, "Ok\n", "Invalid\n"); - cprintf_field(" DWord: ", "%d ", *(uint32_t *)&midbuf[1]); - check_field(*(uint32_t *)&midbuf[1], 2056, "Ok\n", "Invalid\n"); - cprintf_field(" DWord: ", "%d ", *(uint32_t *)&midbuf[5]); - check_field(*(uint32_t *)&midbuf[5], 8, "Ok\n", "Invalid\n"); - cprintf_field(" Byte: ", "%d ", midbuf[41]); - check_field(midbuf[41], 190, "Ok\n", "Invalid\n"); + cprintf_field(" Byte: ", "%d ", tail.unk_2); + check_field(tail.unk_2, 2, "Ok\n", "Invalid\n"); + cprintf_field(" DWord: ", "0x%x ", tail.unk_x808); + check_field(tail.unk_x808, 0x808, "Ok\n", "Invalid\n"); + cprintf_field(" DWord: ", "%d ", tail.unk_8); + check_field(tail.unk_8, 8, "Ok\n", "Invalid\n"); + cprintf_field(" Byte: ", "%d ", tail.unk_190); + check_field(tail.unk_190, 190, "Ok\n", "Invalid\n"); + /* encode super secret at random position in guid stream, never used */ memset(blo, 0, 512); - create_guid(smallblock, 3808); - memcpy(smallblock + 476, midbuf + 42, 16); + create_guid(smallblock, 476 * 8); + memcpy(smallblock + 476, tail.super_secret_xor, 16); compute_checksum(smallblock, 492, blo + 492); int bsz = blo[500]; memcpy(blo, smallblock, bsz); - memcpy(blo + bsz, midbuf + 42, 16); + memcpy(blo + bsz, tail.super_secret_xor, 16); memcpy(blo + bsz + 16, smallblock + bsz, 476 - bsz); + rc4_cipher_block(blo + 492, 16, blo, 492, g_rc4_S); - decode_block_with_perm(blo + 492, 16, blo, 492, g_perm_B); - ret = check_block(buf + 42, midbuf + 88, 450); - cprintf(GREEN, " Decode block: "); + ret = check_block(buf + sizeof(struct fwu_hdr_t), tail.check, 492 - sizeof(struct fwu_hdr_t)); + cprintf(GREEN, " Check: "); check_field(ret, 0, "Pass\n", "Fail\n"); - ret = memcmp(g_subblock_A + 4, midbuf + 9, 16); + ret = memcmp(g_subblock_A.key_B, tail.key_B, 16); cprintf(GREEN, " Compare: "); check_field(ret, 0, "Pass\n", "Fail\n"); /* - ret = memcmp(midbuf + 25, zero, sizeof(zero)); + ret = memcmp(tail.guid, zero, sizeof(zero)); cprintf(GREEN, " Sanity: "); check_field(ret, 0, "Pass\n", "Fail\n"); */ @@ -1000,15 +1060,15 @@ static int decrypt_fwu_v3(uint8_t *buf, size_t *size, uint8_t block[512], enum f /* the input buffer is reorganized based on two offsets (blockA and blockB), * skip 2048 bytes of data used for crypto init */ *size = hdr->fw_size; /* use firmware size, not file size */ - *size -= 2048; + *size -= 512 + 1024 + 512; /* sector0 + blockA + blockB */ uint8_t *tmpbuf = malloc(*size); memset(tmpbuf, 0, *size); - int offsetA = (blockA + 1) << 9; - int offsetB = (blockB + 1) << 9; + int offsetA = 512 * (1 + blockA); + int offsetB = 512 * (1 + blockB); memcpy(tmpbuf, buf + 512, offsetA); - memcpy(tmpbuf + offsetA, buf + offsetA + 1536, offsetB); + memcpy(tmpbuf + offsetA, buf + 512 + offsetA + 1024, offsetB); memcpy(tmpbuf + offsetA + offsetB, - buf + offsetA + 1536 + offsetB + 512, *size - offsetA - offsetB); + buf + 512 + offsetA + 1024 + offsetB + 512, *size - offsetA - offsetB); /* stolen from https://github.com/nfd/atj2127decrypt, I have no idea from where * he got this sequence of code. This code is really weird, I copy verbatim * his authors comment below. @@ -1048,8 +1108,8 @@ static int decrypt_fwu_v3(uint8_t *buf, size_t *size, uint8_t block[512], enum f atj2127_decrypt(buf, tmpbuf, *size, keybuf, rounds_to_perform); else { - compute_perm(keybuf, 32, g_perm_B); - decode_perm(tmpbuf, *size, g_perm_B); + rc4_key_schedule(keybuf, 32, g_rc4_S); + rc4_stream_cipher(tmpbuf, *size, g_rc4_S); memcpy(buf, tmpbuf, *size); } @@ -1102,7 +1162,7 @@ int fwu_decrypt(uint8_t *buf, size_t *size, enum fwu_mode_t mode) cprintf_field(" Block size: ", "%d ", hdr->block_size); check_field(hdr->block_size, FWU_BLOCK_SIZE, "Ok\n", "Invalid\n"); - cprintf_field(" Version: ", "%x ", hdr->version); + cprintf_field(" Version: ", "0x%x ", hdr->version); int ver = get_version(buf, *size); if(ver < 0) {