From 60d420938372477226184fb9012de7f6b4ea2d83 Mon Sep 17 00:00:00 2001
From: Andrew Mahone <andrew.mahone@gmail.com>
Date: Fri, 1 May 2009 23:24:23 +0000
Subject: [PATCH] Add core JPEG reader, adapted from the JPEG plugin's decoder,
 with some changes to prevent include conflicts between the two decoders.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@20836 a1c6a512-1295-4272-9138-f99709370657
---
 apps/SOURCES                     |    1 +
 apps/plugin.c                    |    3 +
 apps/plugin.h                    |   10 +-
 apps/plugins/jpeg/jpeg_decoder.h |   68 +-
 apps/plugins/test_core_jpeg.c    |   89 ++
 apps/plugins/viewers.config      |    3 +
 apps/recorder/jpeg_common.h      |   97 ++
 apps/recorder/jpeg_load.c        | 2012 ++++++++++++++++++++++++++++++
 apps/recorder/jpeg_load.h        |   47 +
 9 files changed, 2262 insertions(+), 68 deletions(-)
 create mode 100644 apps/plugins/test_core_jpeg.c
 create mode 100644 apps/recorder/jpeg_common.h
 create mode 100644 apps/recorder/jpeg_load.c
 create mode 100644 apps/recorder/jpeg_load.h

diff --git a/apps/SOURCES b/apps/SOURCES
index 6b60365694..13ca913a39 100644
--- a/apps/SOURCES
+++ b/apps/SOURCES
@@ -89,6 +89,7 @@ gui/viewport.c
 #if (LCD_DEPTH > 1) || (defined(HAVE_REMOTE_LCD) && (LCD_REMOTE_DEPTH > 1))
 gui/backdrop.c
 recorder/resize.c
+recorder/jpeg_load.c
 #endif
 
 #ifdef HAVE_LCD_CHARCELLS
diff --git a/apps/plugin.c b/apps/plugin.c
index d1e9a7949b..7ebb2aa12f 100644
--- a/apps/plugin.c
+++ b/apps/plugin.c
@@ -643,6 +643,9 @@ static const struct plugin_api rockbox_api = {
 	lcd_pal256_update_pal,
 #endif
 #endif
+#if defined(HAVE_LCD_BITMAP) && LCD_DEPTH > 1
+    read_jpeg_file,
+#endif
 };
 
 int plugin_load(const char* plugin, const void* parameter)
diff --git a/apps/plugin.h b/apps/plugin.h
index d38cc42ee7..ab570d473e 100644
--- a/apps/plugin.h
+++ b/apps/plugin.h
@@ -79,6 +79,9 @@ void* plugin_get_buffer(size_t *buffer_size);
 #ifdef HAVE_LCD_BITMAP
 #include "screendump.h"
 #include "scrollbar.h"
+#if LCD_DEPTH > 1
+#include "jpeg_load.h"
+#endif
 #include "../recorder/bmp.h"
 #endif
 #include "statusbar.h"
@@ -128,7 +131,7 @@ void* plugin_get_buffer(size_t *buffer_size);
 #define PLUGIN_MAGIC 0x526F634B /* RocK */
 
 /* increase this every time the api struct changes */
-#define PLUGIN_API_VERSION 147
+#define PLUGIN_API_VERSION 148
 
 /* update this to latest version if a change to the api struct breaks
    backwards compatibility (and please take the opportunity to sort in any
@@ -802,6 +805,11 @@ struct plugin_api {
 	void (*lcd_pal256_update_pal)(fb_data *palette);
 #endif
 #endif
+
+#if defined(HAVE_LCD_BITMAP) && LCD_DEPTH > 1
+    int (*read_jpeg_file)(const char* filename, struct bitmap *bm, int maxsize,
+                          int format, const struct custom_format *cformat);
+#endif
 };
 
 /* plugin header */
diff --git a/apps/plugins/jpeg/jpeg_decoder.h b/apps/plugins/jpeg/jpeg_decoder.h
index f4dbeaa147..b86bdaf001 100644
--- a/apps/plugins/jpeg/jpeg_decoder.h
+++ b/apps/plugins/jpeg/jpeg_decoder.h
@@ -27,62 +27,7 @@
 
 #ifndef _JPEG_JPEG_DECODER_H
 #define _JPEG_JPEG_DECODER_H
-
-#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */
-
-struct derived_tbl
-{
-    /* Basic tables: (element [0] of each array is unused) */
-    long mincode[17]; /* smallest code of length k */
-    long maxcode[18]; /* largest code of length k (-1 if none) */
-    /* (maxcode[17] is a sentinel to ensure huff_DECODE terminates) */
-    int valptr[17]; /* huffval[] index of 1st symbol of length k */
-
-    /* Back link to public Huffman table (needed only in slow_DECODE) */
-    int* pub;
-
-    /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
-    the input data stream.  If the next Huffman code is no more
-    than HUFF_LOOKAHEAD bits long, we can obtain its length and
-    the corresponding symbol directly from these tables. */
-    int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
-    unsigned char look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
-};
-
-#define QUANT_TABLE_LENGTH  64
-
-/* for type of Huffman table */
-#define DC_LEN 28
-#define AC_LEN 178
-
-struct huffman_table
-{   /* length and code according to JFIF format */
-    int huffmancodes_dc[DC_LEN];
-    int huffmancodes_ac[AC_LEN];
-};
-
-struct frame_component
-{
-    int ID;
-    int horizontal_sampling;
-    int vertical_sampling;
-    int quanttable_select;
-};
-
-struct scan_component
-{
-    int ID;
-    int DC_select;
-    int AC_select;
-};
-
-struct bitstream
-{
-    unsigned long get_buffer; /* current bit-extraction buffer */
-    int bits_left; /* # of unused bits in it */
-    unsigned char* next_input_byte;
-    unsigned char* input_end; /* upper limit +1 */
-};
+#include "jpeg_common.h"
 
 struct jpeg
 {
@@ -113,17 +58,6 @@ struct jpeg
     int subsample_y[3];
 };
 
-
-/* possible return flags for process_markers() */
-#define HUFFTAB   0x0001 /* with huffman table */
-#define QUANTTAB  0x0002 /* with quantization table */
-#define APP0_JFIF 0x0004 /* with APP0 segment following JFIF standard */
-#define FILL_FF   0x0008 /* with 0xFF padding bytes at begin/end */
-#define SOF0      0x0010 /* with SOF0-Segment */
-#define DHT       0x0020 /* with Definition of huffman tables */
-#define SOS       0x0040 /* with Start-of-Scan segment */
-#define DQT       0x0080 /* with definition of quantization table */
-
 /* various helper functions */
 void default_huff_tbl(struct jpeg* p_jpeg);
 void build_lut(struct jpeg* p_jpeg);
diff --git a/apps/plugins/test_core_jpeg.c b/apps/plugins/test_core_jpeg.c
new file mode 100644
index 0000000000..5df69b5792
--- /dev/null
+++ b/apps/plugins/test_core_jpeg.c
@@ -0,0 +1,89 @@
+/*****************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _// __ \_/ ___\|  |/ /| __ \ / __ \  \/  /
+ *   Jukebox    |    |   ( (__) )  \___|    ( | \_\ ( (__) )    (
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 Andrew Mahone
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "plugin.h"
+#include "lib/grey.h"
+PLUGIN_HEADER
+
+/* different graphics libraries */
+#if LCD_DEPTH < 8
+#define USEGSLIB
+GREY_INFO_STRUCT
+#define MYLCD(fn) grey_ub_ ## fn
+#define MYLCD_UPDATE()
+#define MYXLCD(fn) grey_ub_ ## fn
+#define CFORMAT &format_grey
+#else
+#define MYLCD(fn) rb->lcd_ ## fn
+#define MYLCD_UPDATE() rb->lcd_update();
+#define MYXLCD(fn) xlcd_ ## fn
+#define CFORMAT NULL
+#endif
+
+/* this is the plugin entry point */
+enum plugin_status plugin_start(const void* parameter)
+{
+    size_t plugin_buf_len;
+    unsigned char * plugin_buf =
+        (unsigned char *)rb->plugin_get_buffer(&plugin_buf_len);
+    static char filename[MAX_PATH];
+    struct bitmap bm = {
+        .width = LCD_WIDTH,
+        .height = LCD_HEIGHT,
+    };
+    int ret;
+
+    if(!parameter) return PLUGIN_ERROR;
+
+    rb->strcpy(filename, parameter);
+
+#ifdef USEGSLIB
+    long greysize;
+    if (!grey_init(plugin_buf, plugin_buf_len, GREY_ON_COP,
+                   LCD_WIDTH, LCD_HEIGHT, &greysize))
+    {
+        rb->splash(HZ, "grey buf error");
+        return PLUGIN_ERROR;
+    }
+    plugin_buf += greysize;
+    plugin_buf_len -= greysize;
+#endif
+    bm.data = plugin_buf;
+    ret = rb->read_jpeg_file(filename, &bm, plugin_buf_len,
+                   FORMAT_NATIVE|FORMAT_RESIZE|FORMAT_KEEP_ASPECT,
+                   CFORMAT);
+    if (ret < 1)
+        return PLUGIN_ERROR;
+#ifdef USEGSLIB
+    grey_show(true);
+    grey_ub_gray_bitmap((fb_data *)bm.data, (LCD_WIDTH - bm.width) >> 1,
+        (LCD_HEIGHT - bm.height) >> 1, bm.width, bm.height);
+#else
+    rb->lcd_bitmap((fb_data *)bm.data, (LCD_WIDTH - bm.width) >> 1,
+        (LCD_HEIGHT - bm.height) >> 1, bm.width, bm.height);
+#endif
+    MYLCD_UPDATE();
+    while (rb->get_action(CONTEXT_STD,1) != ACTION_STD_OK) rb->yield();
+#ifdef USEGSLIB
+    grey_release();
+#endif
+    return PLUGIN_OK;
+}
diff --git a/apps/plugins/viewers.config b/apps/plugins/viewers.config
index ff77dd85b6..09d0455c22 100644
--- a/apps/plugins/viewers.config
+++ b/apps/plugins/viewers.config
@@ -27,6 +27,9 @@ wav,viewers/wavplay,9
 wav,viewers/wavview,10
 wav,viewers/test_codec,-
 bmp,viewers/test_greylib_bitmap_scale,-
+jpeg,viewers/test_core_jpeg,-
+jpe,viewers/test_core_jpeg,-
+jpg,viewers/test_core_jpeg,-
 bmp,apps/rockpaint,11
 bmp,games/sliding_puzzle,11
 mpg,viewers/mpegplayer,4
diff --git a/apps/recorder/jpeg_common.h b/apps/recorder/jpeg_common.h
new file mode 100644
index 0000000000..44bf81e435
--- /dev/null
+++ b/apps/recorder/jpeg_common.h
@@ -0,0 +1,97 @@
+/***************************************************************************
+*             __________               __   ___.
+*   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+*   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+*   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+*   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+*                     \/            \/     \/    \/            \/
+* $Id$
+*
+* JPEG image viewer
+* Common structs and defines for plugin and core JPEG decoders
+*
+* File scrolling addition (C) 2005 Alexander Spyridakis
+* Copyright (C) 2004 Jörg Hohensohn aka [IDC]Dragon
+* Heavily borrowed from the IJG implementation (C) Thomas G. Lane
+* Small & fast downscaling IDCT (C) 2002 by Guido Vollbeding  JPEGclub.org
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+*
+* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+* KIND, either express or implied.
+*
+****************************************************************************/
+
+#ifndef _JPEG_COMMON_H
+#define _JPEG_COMMON_H
+
+#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */
+#define JPEG_READ_BUF_SIZE 16
+struct derived_tbl
+{
+    /* Basic tables: (element [0] of each array is unused) */
+    long mincode[17]; /* smallest code of length k */
+    long maxcode[18]; /* largest code of length k (-1 if none) */
+    /* (maxcode[17] is a sentinel to ensure huff_DECODE terminates) */
+    int valptr[17]; /* huffval[] index of 1st symbol of length k */
+
+    /* Back link to public Huffman table (needed only in slow_DECODE) */
+    int* pub;
+
+    /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+    the input data stream.  If the next Huffman code is no more
+    than HUFF_LOOKAHEAD bits long, we can obtain its length and
+    the corresponding symbol directly from these tables. */
+    int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
+    unsigned char look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+};
+
+#define QUANT_TABLE_LENGTH  64
+
+/* for type of Huffman table */
+#define DC_LEN 28
+#define AC_LEN 178
+
+struct huffman_table
+{   /* length and code according to JFIF format */
+    int huffmancodes_dc[DC_LEN];
+    int huffmancodes_ac[AC_LEN];
+};
+
+struct frame_component
+{
+    int ID;
+    int horizontal_sampling;
+    int vertical_sampling;
+    int quanttable_select;
+};
+
+struct scan_component
+{
+    int ID;
+    int DC_select;
+    int AC_select;
+};
+
+struct bitstream
+{
+    unsigned long get_buffer; /* current bit-extraction buffer */
+    int bits_left; /* # of unused bits in it */
+    unsigned char* next_input_byte;
+    unsigned char* input_end; /* upper limit +1 */
+};
+
+/* possible return flags for process_markers() */
+#define HUFFTAB   0x0001 /* with huffman table */
+#define QUANTTAB  0x0002 /* with quantization table */
+#define APP0_JFIF 0x0004 /* with APP0 segment following JFIF standard */
+#define FILL_FF   0x0008 /* with 0xFF padding bytes at begin/end */
+#define SOF0      0x0010 /* with SOF0-Segment */
+#define DHT       0x0020 /* with Definition of huffman tables */
+#define SOS       0x0040 /* with Start-of-Scan segment */
+#define DQT       0x0080 /* with definition of quantization table */
+
+#endif /* _JPEG_COMMON_H */
diff --git a/apps/recorder/jpeg_load.c b/apps/recorder/jpeg_load.c
new file mode 100644
index 0000000000..d05671cb09
--- /dev/null
+++ b/apps/recorder/jpeg_load.c
@@ -0,0 +1,2012 @@
+/***************************************************************************
+*             __________               __   ___.
+*   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+*   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+*   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+*   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+*                     \/            \/     \/    \/            \/
+* $Id$
+*
+* JPEG image viewer
+* (This is a real mess if it has to be coded in one single C file)
+*
+* Copyright (C) 2009 Andrew Mahone fractional decode, split IDCT - 16-point
+*   IDCT based on IJG jpeg-7 pre-release
+* File scrolling addition (C) 2005 Alexander Spyridakis
+* Copyright (C) 2004 Jörg Hohensohn aka [IDC]Dragon
+* Heavily borrowed from the IJG implementation (C) Thomas G. Lane
+* Small & fast downscaling IDCT (C) 2002 by Guido Vollbeding  JPEGclub.org
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+*
+* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+* KIND, either express or implied.
+*
+****************************************************************************/
+
+#include "plugin.h"
+#include "debug.h"
+#include "jpeg_load.h"
+/*#define JPEG_BS_DEBUG*/
+/* for portability of below JPEG code */
+#define MEMSET(p,v,c) memset(p,v,c)
+#define MEMCPY(d,s,c) memcpy(d,s,c)
+#define INLINE static inline
+#define ENDIAN_SWAP16(n) n /* only for poor little endian machines */
+
+/**************** begin JPEG code ********************/
+
+#ifdef HAVE_LCD_COLOR
+typedef struct uint8_rgb jpeg_pix_t;
+#else
+typedef uint8_t jpeg_pix_t;
+#endif
+#define JPEG_PIX_SZ (sizeof(jpeg_pix_t))
+
+/* This can't be in jpeg_load.h because plugin.h includes it, and it conflicts
+ * with the definition in jpeg_decoder.h
+ */
+struct jpeg
+{
+    int fd;
+    int buf_left;
+    unsigned char *buf_index;
+    unsigned long int bitbuf;
+    int bitbuf_bits;
+    int marker_ind;
+    int marker_val;
+    unsigned char marker;
+    int x_size, y_size; /* size of image (can be less than block boundary) */
+    int x_phys, y_phys; /* physical size, block aligned */
+    int x_mbl; /* x dimension of MBL */
+    int y_mbl; /* y dimension of MBL */
+    int blocks; /* blocks per MB */
+    int restart_interval; /* number of MCUs between RSTm markers */
+    int restart; /* blocks until next restart marker */
+    int mcu_row; /* current row relative to first row of this row of MCUs */
+    unsigned char *out_ptr; /* pointer to current row to output */
+    int cur_row; /* current row relative to top of image */
+    int set_rows;
+    int store_pos[4]; /* for Y block ordering */
+#ifdef HAVE_LCD_COLOR
+    int last_dc_val[3];
+#else
+    int last_dc_val;
+#endif
+    int h_scale[2]; /* horizontal scalefactor = (2**N) / 8 */
+    int v_scale[2]; /* same as above, for vertical direction */
+    int k_need[3]; /* per component zig-zag index of last needed coefficient */
+    int zero_need[3]; /* per compenent number of coefficients to zero */
+    jpeg_pix_t *img_buf;
+
+    int quanttable[4][QUANT_TABLE_LENGTH]; /* raw quantization tables 0-3 */
+
+    struct huffman_table hufftable[2]; /* Huffman tables  */
+    struct derived_tbl dc_derived_tbls[2]; /* Huffman-LUTs */
+    struct derived_tbl ac_derived_tbls[2];
+
+    struct frame_component frameheader[3]; /* Component descriptor */
+    struct scan_component scanheader[3]; /* currently not used */
+
+    int mcu_membership[6]; /* info per block */
+    int tab_membership[6];
+    int subsample_x[3]; /* info per component */
+    int subsample_y[3];
+    unsigned char buf[JPEG_READ_BUF_SIZE];
+    struct img_part part;
+};
+
+INLINE unsigned range_limit(int value)
+{
+#if CONFIG_CPU == SH7034
+    unsigned tmp;
+    asm (  /* Note: Uses knowledge that only low byte of result is used */
+        "mov     #-128,%[t]  \n"
+        "sub     %[t],%[v]   \n"  /* value -= -128; equals value += 128; */
+        "extu.b  %[v],%[t]   \n"
+        "cmp/eq  %[v],%[t]   \n"  /* low byte == whole number ? */
+        "bt      1f          \n"  /* yes: no overflow */
+        "cmp/pz  %[v]        \n"  /* overflow: positive? */
+        "subc    %[v],%[v]   \n"  /* %[r] now either 0 or 0xffffffff */
+    "1:                      \n"
+        : /* outputs */
+        [v]"+r"(value),
+        [t]"=&r"(tmp)
+    );
+    return value;
+#elif defined(CPU_COLDFIRE)
+    /* Note: Uses knowledge that only the low byte of the result is used */
+    asm (
+        "add.l   #128,%[v]   \n"  /* value += 128; */
+        "cmp.l   #255,%[v]   \n"  /* overflow? */
+        "bls.b   1f          \n"  /* no: return value */
+        /* yes: set low byte to appropriate boundary */
+        "spl.b   %[v]        \n"
+    "1:                      \n"
+        : /* outputs */
+        [v]"+d"(value)
+    );
+    return value;
+#elif defined(CPU_ARM)
+    /* Note: Uses knowledge that only the low byte of the result is used */
+    asm (
+        "add     %[v], %[v], #128    \n"  /* value += 128 */
+        "cmp     %[v], #255          \n"  /* out of range 0..255? */
+        "mvnhi   %[v], %[v], asr #31 \n"  /* yes: set all bits to ~(sign_bit) */
+        : /* outputs */
+        [v]"+r"(value)
+    );
+    return value;
+#else
+    value += 128;
+
+    if ((unsigned)value <= 255)
+        return value;
+
+    if (value < 0)
+        return 0;
+
+    return 255;
+#endif
+}
+
+static inline int clamp_component(int x)
+{
+    if ((unsigned)x > 255)
+        x = x < 0 ? 0 : 255;
+    return x;
+}
+
+/* IDCT implementation */
+
+
+#define CONST_BITS 13
+#define PASS1_BITS 2
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+* causing a lot of useless floating-point operations at run time.
+* To get around this we use the following pre-calculated constants.
+* If you change CONST_BITS you may want to add appropriate values.
+* (With a reasonable C compiler, you can just rely on the FIX() macro...)
+*/
+#define FIX_0_298631336  2446 /* FIX(0.298631336) */
+#define FIX_0_390180644  3196 /* FIX(0.390180644) */
+#define FIX_0_541196100  4433 /* FIX(0.541196100) */
+#define FIX_0_765366865  6270 /* FIX(0.765366865) */
+#define FIX_0_899976223  7373 /* FIX(0.899976223) */
+#define FIX_1_175875602  9633 /* FIX(1.175875602) */
+#define FIX_1_501321110 12299 /* FIX(1.501321110) */
+#define FIX_1_847759065 15137 /* FIX(1.847759065) */
+#define FIX_1_961570560 16069 /* FIX(1.961570560) */
+#define FIX_2_053119869 16819 /* FIX(2.053119869) */
+#define FIX_2_562915447 20995 /* FIX(2.562915447) */
+#define FIX_3_072711026 25172 /* FIX(3.072711026) */
+
+
+
+/* Multiply an long variable by an long constant to yield an long result.
+* For 8-bit samples with the recommended scaling, all the variable
+* and constant values involved are no more than 16 bits wide, so a
+* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+* For 12-bit samples, a full 32-bit multiplication will be needed.
+*/
+#define MULTIPLY16(var,const)  (((short) (var)) * ((short) (const)))
+
+#define MULTIPLY(var1, var2) ((var1) * (var2))
+
+/*
+ * Macros for handling fixed-point arithmetic; these are used by many
+ * but not all of the DCT/IDCT modules.
+ *
+ * All values are expected to be of type INT32.
+ * Fractional constants are scaled left by CONST_BITS bits.
+ * CONST_BITS is defined within each module using these macros,
+ * and may differ from one module to the next.
+ */
+#define ONE ((long)1)
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
+ * thus causing a lot of useless floating-point operations at run time.
+ */
+#define FIX(x) ((long) ((x) * CONST_SCALE + 0.5))
+#define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
+
+/* Descale and correctly round an int value that's scaled by N bits.
+* We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+* the fudge factor is correct for either sign of X.
+*/
+#define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
+
+#define DS_OUT ((CONST_BITS)+(PASS1_BITS)+3)
+
+/*
+ * Conversion of full 0-255 range YCrCb to RGB:
+ *   |R|   |1.000000 -0.000001  1.402000| |Y'|
+ *   |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ *   |B|   |1.000000  1.772000  0.000000| |Pr|
+ * Scaled (yields s15-bit output):
+ *   |R|   |128    0  179| |Y       |
+ *   |G| = |128  -43  -91| |Cb - 128|
+ *   |B|   |128  227    0| |Cr - 128|
+ */
+#define YFAC            128
+#define RVFAC           179
+#define GUFAC           (-43)
+#define GVFAC           (-91)
+#define BUFAC           227
+#define COMPONENT_SHIFT  15
+
+/* horizontal-pass 1-point IDCT */
+static void idct1h(int *ws, unsigned char *out, int rows, int rowstep)
+{
+    int row;
+    for (row = 0; row < rows; row++)
+    {
+        *out = range_limit((int) DESCALE(*ws, DS_OUT));
+        out += rowstep;
+        ws += 8;
+    }
+}
+
+/* vertical-pass 2-point IDCT */
+static void idct2v(int *ws, int cols)
+{
+    int col;
+    for (col = 0; col < cols; col++)
+    {
+        int tmp1 = ws[0];
+        int tmp2 = ws[8];
+        ws[0] = tmp1 + tmp2;
+        ws[8] = tmp1 - tmp2;
+        ws++;
+    }
+}
+
+/* horizontal-pass 2-point IDCT */
+static void idct2h(int *ws, unsigned char *out, int rows, int rowstep)
+{
+    int row;
+    for (row = 0; row < rows; row++)
+    {
+        int tmp1 = ws[0] + (ONE << (DS_OUT - 1));
+        int tmp2 = ws[1];
+        out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp1 + tmp2,
+            DS_OUT));
+        out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp1 - tmp2,
+            DS_OUT));
+        out += rowstep;
+        ws += 8;
+    }
+}
+
+/* vertical-pass 4-point IDCT */
+static void idct4v(int *ws, int cols)
+{
+    int tmp0, tmp2, tmp10, tmp12;
+    int z1, z2, z3;
+    int col;
+    for (col = 0; col < cols; col++, ws++)
+    {
+        /* Even part */
+
+        tmp0 = ws[8*0];
+        tmp2 = ws[8*2];
+
+        tmp10 = (tmp0 + tmp2) << PASS1_BITS;
+        tmp12 = (tmp0 - tmp2) << PASS1_BITS;
+
+        /* Odd part */
+        /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+        z2 = ws[8*1];
+        z3 = ws[8*3];
+
+        z1 = MULTIPLY16(z2 + z3, FIX_0_541196100) +
+            (ONE << (CONST_BITS - PASS1_BITS - 1));
+        tmp0 = RIGHT_SHIFT(z1 + MULTIPLY16(z3, - FIX_1_847759065),
+            CONST_BITS-PASS1_BITS);
+        tmp2 = RIGHT_SHIFT(z1 + MULTIPLY16(z2, FIX_0_765366865),
+            CONST_BITS-PASS1_BITS);
+
+        /* Final output stage */
+
+        ws[8*0] = (int) (tmp10 + tmp2);
+        ws[8*3] = (int) (tmp10 - tmp2);
+        ws[8*1] = (int) (tmp12 + tmp0);
+        ws[8*2] = (int) (tmp12 - tmp0);
+    }
+}
+
+/* horizontal-pass 4-point IDCT */
+static void idct4h(int *ws, unsigned char *out, int rows, int rowstep)
+{
+    int tmp0, tmp2, tmp10, tmp12;
+    int z1, z2, z3;
+    int row;
+    for (row = 0; row < rows; row++, out += rowstep, ws += 8)
+    {
+        /* Even part */
+
+        tmp0 = (int) ws[0] + (ONE << (PASS1_BITS + 2));
+        tmp2 = (int) ws[2];
+
+        tmp10 = (tmp0 + tmp2) << CONST_BITS;
+        tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+        /* Odd part */
+        /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+        z2 = (int) ws[1];
+        z3 = (int) ws[3];
+
+        z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
+        tmp0 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
+        tmp2 = z1 + MULTIPLY16(z2, FIX_0_765366865);
+
+        /* Final output stage */
+
+        out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp10 + tmp2,
+            DS_OUT));
+        out[JPEG_PIX_SZ*3] = range_limit((int) RIGHT_SHIFT(tmp10 - tmp2,
+            DS_OUT));
+        out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp12 + tmp0,
+            DS_OUT));
+        out[JPEG_PIX_SZ*2] = range_limit((int) RIGHT_SHIFT(tmp12 - tmp0,
+            DS_OUT));
+    }
+}
+
+/* vertical-pass 8-point IDCT */
+static void idct8v(int *ws, int cols)
+{
+    long tmp0, tmp1, tmp2, tmp3;
+    long tmp10, tmp11, tmp12, tmp13;
+    long z1, z2, z3, z4, z5;
+    int col;
+    for (col = 0; col < cols; col++, ws++)
+    {
+    /* Due to quantization, we will usually find that many of the input
+    * coefficients are zero, especially the AC terms.  We can exploit this
+    * by short-circuiting the IDCT calculation for any column in which all
+    * the AC terms are zero.  In that case each output is equal to the
+    * DC coefficient (with scale factor as needed).
+    * With typical images and quantization tables, half or more of the
+    * column DCT calculations can be simplified this way.
+    */
+        if ((ws[8*1] | ws[8*2] | ws[8*3]
+           | ws[8*4] | ws[8*5] | ws[8*6] | ws[8*7]) == 0)
+        {
+            /* AC terms all zero */
+            int dcval = ws[8*0] << PASS1_BITS;
+
+            ws[8*0] = ws[8*1] = ws[8*2] = ws[8*3] = ws[8*4]
+                       = ws[8*5] = ws[8*6] = ws[8*7] = dcval;
+            continue;
+        }
+
+        /* Even part: reverse the even part of the forward DCT. */
+        /* The rotator is sqrt(2)*c(-6). */
+
+        z2 = ws[8*2];
+        z3 = ws[8*6];
+
+        z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
+        tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
+        tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
+
+        z2 = ws[8*0] << CONST_BITS;
+        z2 += ONE << (CONST_BITS - PASS1_BITS - 1);
+        z3 = ws[8*4] << CONST_BITS;
+
+        tmp0 = (z2 + z3);
+        tmp1 = (z2 - z3);
+
+        tmp10 = tmp0 + tmp3;
+        tmp13 = tmp0 - tmp3;
+        tmp11 = tmp1 + tmp2;
+        tmp12 = tmp1 - tmp2;
+
+        /* Odd part per figure 8; the matrix is unitary and hence its
+           transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively. */
+
+        tmp0 = ws[8*7];
+        tmp1 = ws[8*5];
+        tmp2 = ws[8*3];
+        tmp3 = ws[8*1];
+
+        z1 = tmp0 + tmp3;
+        z2 = tmp1 + tmp2;
+        z3 = tmp0 + tmp2;
+        z4 = tmp1 + tmp3;
+        z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+        tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+        tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+        tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+        tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+        z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+        z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+        z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+        z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+        z3 += z5;
+        z4 += z5;
+
+        tmp0 += z1 + z3;
+        tmp1 += z2 + z4;
+        tmp2 += z2 + z3;
+        tmp3 += z1 + z4;
+
+        /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+        ws[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+        ws[8*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+        ws[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+        ws[8*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+        ws[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+        ws[8*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+        ws[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+        ws[8*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+    }
+}
+
+/* horizontal-pass 8-point IDCT */
+static void idct8h(int *ws, unsigned char *out, int rows, int rowstep)
+{
+    long tmp0, tmp1, tmp2, tmp3;
+    long tmp10, tmp11, tmp12, tmp13;
+    long z1, z2, z3, z4, z5;
+    int row;
+    for (row = 0; row < rows; row++, out += rowstep, ws += 8)
+    {
+        /* Rows of zeroes can be exploited in the same way as we did with
+         * columns. However, the column calculation has created many nonzero AC
+         * terms, so the simplification applies less often (typically 5% to 10%
+         * of the time). On machines with very fast multiplication, it's
+         * possible that the test takes more time than it's worth.  In that
+         * case this section may be commented out.
+        */
+
+#ifndef NO_ZERO_ROW_TEST
+        if ((ws[1] | ws[2] | ws[3]
+           | ws[4] | ws[5] | ws[6] | ws[7]) == 0)
+        {
+            /* AC terms all zero */
+            unsigned char dcval = range_limit((int) DESCALE((long) ws[0],
+                PASS1_BITS+3));
+
+            out[JPEG_PIX_SZ*0] = dcval;
+            out[JPEG_PIX_SZ*1] = dcval;
+            out[JPEG_PIX_SZ*2] = dcval;
+            out[JPEG_PIX_SZ*3] = dcval;
+            out[JPEG_PIX_SZ*4] = dcval;
+            out[JPEG_PIX_SZ*5] = dcval;
+            out[JPEG_PIX_SZ*6] = dcval;
+            out[JPEG_PIX_SZ*7] = dcval;
+            continue;
+        }
+#endif
+
+        /* Even part: reverse the even part of the forward DCT. */
+        /* The rotator is sqrt(2)*c(-6). */
+
+        z2 = (long) ws[2];
+        z3 = (long) ws[6];
+
+        z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
+        tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
+        tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
+
+        z4 = (long) ws[0] + (ONE << (PASS1_BITS + 2));
+        z4 <<= CONST_BITS;
+        z5 = (long) ws[4] << CONST_BITS;
+        tmp0 = z4 + z5;
+        tmp1 = z4 - z5;
+
+        tmp10 = tmp0 + tmp3;
+        tmp13 = tmp0 - tmp3;
+        tmp11 = tmp1 + tmp2;
+        tmp12 = tmp1 - tmp2;
+
+        /* Odd part per figure 8; the matrix is unitary and hence its
+        * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */
+
+        tmp0 = (long) ws[7];
+        tmp1 = (long) ws[5];
+        tmp2 = (long) ws[3];
+        tmp3 = (long) ws[1];
+
+        z1 = tmp0 + tmp3;
+        z2 = tmp1 + tmp2;
+        z3 = tmp0 + tmp2;
+        z4 = tmp1 + tmp3;
+        z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+        tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+        tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+        tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+        tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+        z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+        z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+        z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+        z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+        z3 += z5;
+        z4 += z5;
+
+        tmp0 += z1 + z3;
+        tmp1 += z2 + z4;
+        tmp2 += z2 + z3;
+        tmp3 += z1 + z4;
+
+        /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+        out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp10 + tmp3,
+            DS_OUT));
+        out[JPEG_PIX_SZ*7] = range_limit((int) RIGHT_SHIFT(tmp10 - tmp3,
+            DS_OUT));
+        out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp11 + tmp2,
+            DS_OUT));
+        out[JPEG_PIX_SZ*6] = range_limit((int) RIGHT_SHIFT(tmp11 - tmp2,
+            DS_OUT));
+        out[JPEG_PIX_SZ*2] = range_limit((int) RIGHT_SHIFT(tmp12 + tmp1,
+            DS_OUT));
+        out[JPEG_PIX_SZ*5] = range_limit((int) RIGHT_SHIFT(tmp12 - tmp1,
+            DS_OUT));
+        out[JPEG_PIX_SZ*3] = range_limit((int) RIGHT_SHIFT(tmp13 + tmp0,
+            DS_OUT));
+        out[JPEG_PIX_SZ*4] = range_limit((int) RIGHT_SHIFT(tmp13 - tmp0,
+            DS_OUT));
+    }
+}
+
+/* vertical-pass 16-point IDCT */
+static void idct16v(int *ws, int cols)
+{
+    long tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+    long tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+    long z1, z2, z3, z4;
+    int col;
+    for (col = 0; col < cols; col++, ws++)
+    {
+        /* Even part */
+
+        tmp0 = ws[8*0] << CONST_BITS;
+        /* Add fudge factor here for final descale. */
+        tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
+
+        z1 = ws[8*4];
+        tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+        tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+        tmp10 = tmp0 + tmp1;
+        tmp11 = tmp0 - tmp1;
+        tmp12 = tmp0 + tmp2;
+        tmp13 = tmp0 - tmp2;
+
+        z1 = ws[8*2];
+        z2 = ws[8*6];
+        z3 = z1 - z2;
+        z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+        z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+        /* (c6+c2)[16] = (c3+c1)[8] */
+        tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);
+        /* (c6-c14)[16] = (c3-c7)[8] */
+        tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);
+        /* (c2-c10)[16] = (c1-c5)[8] */
+        tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887));
+        /* (c10-c14)[16] = (c5-c7)[8] */
+        tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579));
+
+        tmp20 = tmp10 + tmp0;
+        tmp27 = tmp10 - tmp0;
+        tmp21 = tmp12 + tmp1;
+        tmp26 = tmp12 - tmp1;
+        tmp22 = tmp13 + tmp2;
+        tmp25 = tmp13 - tmp2;
+        tmp23 = tmp11 + tmp3;
+        tmp24 = tmp11 - tmp3;
+
+        /* Odd part */
+
+        z1 = ws[8*1];
+        z2 = ws[8*3];
+        z3 = ws[8*5];
+        z4 = ws[8*7];
+
+        tmp11 = z1 + z3;
+
+        tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+        tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+        tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+        tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+        tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+        tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+        tmp0  = tmp1 + tmp2 + tmp3 -
+            MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+        tmp13 = tmp10 + tmp11 + tmp12 -
+            MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+        z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+        tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+        tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+        z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+        tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+        tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+        z2    += z4;
+        z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+        tmp1  += z1;
+        tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+        z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+        tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+        tmp12 += z2;
+        z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+        tmp2  += z2;
+        tmp3  += z2;
+        z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+        tmp10 += z2;
+        tmp11 += z2;
+
+        /* Final output stage */
+        ws[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
+        ws[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
+        ws[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
+        ws[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
+        ws[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
+        ws[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
+        ws[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
+        ws[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
+        ws[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+        ws[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+        ws[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+        ws[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+        ws[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+        ws[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+        ws[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+        ws[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+    }
+}
+
+/* horizontal-pass 16-point IDCT */
+static void idct16h(int *ws, unsigned char *out, int rows, int rowstep)
+{
+    long tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+    long tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+    long z1, z2, z3, z4;
+    int row;
+    for (row = 0; row < rows; row++, out += rowstep, ws += 8)
+    {
+        /* Even part */
+
+        /* Add fudge factor here for final descale. */
+        tmp0 = (long) ws[0] + (ONE << (PASS1_BITS+2));
+        tmp0 <<= CONST_BITS;
+
+        z1 = (long) ws[4];
+        tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+        tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+        tmp10 = tmp0 + tmp1;
+        tmp11 = tmp0 - tmp1;
+        tmp12 = tmp0 + tmp2;
+        tmp13 = tmp0 - tmp2;
+
+        z1 = (long) ws[2];
+        z2 = (long) ws[6];
+        z3 = z1 - z2;
+        z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+        z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+        /* (c6+c2)[16] = (c3+c1)[8] */
+        tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);
+        /* (c6-c14)[16] = (c3-c7)[8] */
+        tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);
+        /* (c2-c10)[16] = (c1-c5)[8] */
+        tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887));
+        /* (c10-c14)[16] = (c5-c7)[8] */
+        tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579));
+
+        tmp20 = tmp10 + tmp0;
+        tmp27 = tmp10 - tmp0;
+        tmp21 = tmp12 + tmp1;
+        tmp26 = tmp12 - tmp1;
+        tmp22 = tmp13 + tmp2;
+        tmp25 = tmp13 - tmp2;
+        tmp23 = tmp11 + tmp3;
+        tmp24 = tmp11 - tmp3;
+
+        /* Odd part */
+
+        z1 = (long) ws[1];
+        z2 = (long) ws[3];
+        z3 = (long) ws[5];
+        z4 = (long) ws[7];
+
+        tmp11 = z1 + z3;
+
+        tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+        tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+        tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+        tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+        tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+        tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+        tmp0  = tmp1 + tmp2 + tmp3 -
+            MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+        tmp13 = tmp10 + tmp11 + tmp12 -
+            MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+        z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+        tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+        tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+        z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+        tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+        tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+        z2    += z4;
+        z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+        tmp1  += z1;
+        tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+        z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+        tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+        tmp12 += z2;
+        z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+        tmp2  += z2;
+        tmp3  += z2;
+        z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+        tmp10 += z2;
+        tmp11 += z2;
+
+        /* Final output stage */
+
+        out[JPEG_PIX_SZ*0]  = range_limit((int) RIGHT_SHIFT(tmp20 + tmp0,
+            DS_OUT));
+        out[JPEG_PIX_SZ*15] = range_limit((int) RIGHT_SHIFT(tmp20 - tmp0,
+            DS_OUT));
+        out[JPEG_PIX_SZ*1]  = range_limit((int) RIGHT_SHIFT(tmp21 + tmp1,
+            DS_OUT));
+        out[JPEG_PIX_SZ*14] = range_limit((int) RIGHT_SHIFT(tmp21 - tmp1,
+            DS_OUT));
+        out[JPEG_PIX_SZ*2]  = range_limit((int) RIGHT_SHIFT(tmp22 + tmp2,
+            DS_OUT));
+        out[JPEG_PIX_SZ*13] = range_limit((int) RIGHT_SHIFT(tmp22 - tmp2,
+            DS_OUT));
+        out[JPEG_PIX_SZ*3]  = range_limit((int) RIGHT_SHIFT(tmp23 + tmp3,
+            DS_OUT));
+        out[JPEG_PIX_SZ*12] = range_limit((int) RIGHT_SHIFT(tmp23 - tmp3,
+            DS_OUT));
+        out[JPEG_PIX_SZ*4]  = range_limit((int) RIGHT_SHIFT(tmp24 + tmp10,
+            DS_OUT));
+        out[JPEG_PIX_SZ*11] = range_limit((int) RIGHT_SHIFT(tmp24 - tmp10,
+            DS_OUT));
+        out[JPEG_PIX_SZ*5]  = range_limit((int) RIGHT_SHIFT(tmp25 + tmp11,
+            DS_OUT));
+        out[JPEG_PIX_SZ*10] = range_limit((int) RIGHT_SHIFT(tmp25 - tmp11,
+            DS_OUT));
+        out[JPEG_PIX_SZ*6]  = range_limit((int) RIGHT_SHIFT(tmp26 + tmp12,
+            DS_OUT));
+        out[JPEG_PIX_SZ*9]  = range_limit((int) RIGHT_SHIFT(tmp26 - tmp12,
+            DS_OUT));
+        out[JPEG_PIX_SZ*7]  = range_limit((int) RIGHT_SHIFT(tmp27 + tmp13,
+            DS_OUT));
+        out[JPEG_PIX_SZ*8]  = range_limit((int) RIGHT_SHIFT(tmp27 - tmp13,
+            DS_OUT));
+    }
+}
+
+struct idct_entry {
+    int v_scale;
+    int h_scale;
+    void (*v_idct)(int *ws, int cols);
+    void (*h_idct)(int *ws, unsigned char *out, int rows, int rowstep);
+};
+
+struct idct_entry idct_tbl[] = {
+    { PASS1_BITS, CONST_BITS, NULL, idct1h },
+    { PASS1_BITS, CONST_BITS, idct2v, idct2h },
+    { 0, 0, idct4v, idct4h },
+    { 0, 0, idct8v, idct8h },
+    { 0, 0, idct16v, idct16h },
+};
+
+/* JPEG decoder implementation */
+
+INLINE void fill_buf(struct jpeg* p_jpeg)
+{
+        p_jpeg->buf_left = read(p_jpeg->fd, p_jpeg->buf, JPEG_READ_BUF_SIZE);
+        p_jpeg->buf_index = p_jpeg->buf;
+}
+
+static unsigned char *getc(struct jpeg* p_jpeg)
+{
+    if (p_jpeg->buf_left < 1)
+        fill_buf(p_jpeg);
+    if (p_jpeg->buf_left < 1)
+        return NULL;
+    p_jpeg->buf_left--;
+    return p_jpeg->buf_index++;
+}
+
+INLINE bool skip_bytes_seek(struct jpeg* p_jpeg)
+{
+    if (lseek(p_jpeg->fd, -p_jpeg->buf_left, SEEK_CUR) < 0)
+        return false;
+    p_jpeg->buf_left = 0;
+    return true;
+}
+
+static bool skip_bytes(struct jpeg* p_jpeg, int count)
+{
+    p_jpeg->buf_left -= count;
+    p_jpeg->buf_index += count;
+    return p_jpeg->buf_left >= 0 || skip_bytes_seek(p_jpeg);
+}
+
+#define e_skip_bytes(jpeg, count) \
+do {\
+    if (!skip_bytes((jpeg),(count))) \
+        return -1; \
+} while (0)
+
+#define e_getc(jpeg, code) \
+({ \
+    unsigned char *c; \
+    if (!(c = getc(jpeg))) \
+        return (code); \
+    *c; \
+})
+
+#define d_getc(jpeg, def) \
+({ \
+    unsigned char *cp = getc(jpeg); \
+    unsigned char c = cp ? *cp : (def); \
+    c; \
+})
+
+static void putc(struct jpeg* p_jpeg)
+{
+    p_jpeg->buf_left++;
+    p_jpeg->buf_index--;
+}
+
+/* Preprocess the JPEG JFIF file */
+static int process_markers(struct jpeg* p_jpeg)
+{
+    unsigned char c;
+    int marker_size; /* variable length of marker segment */
+    int i, j, n;
+    int ret = 0; /* returned flags */
+
+    while ((c = e_getc(p_jpeg, -1)))
+    {
+        if (c != 0xFF) /* no marker? */
+        {
+            putc(p_jpeg);
+            break; /* exit marker processing */
+        }
+
+        c = e_getc(p_jpeg, -1);
+        switch (c)
+        {
+        case 0xFF: /* Fill byte */
+            ret |= FILL_FF;
+        case 0x00: /* Zero stuffed byte - entropy data */
+            putc(p_jpeg);
+            continue;
+
+        case 0xC0: /* SOF Huff  - Baseline DCT */
+            {
+                ret |= SOF0;
+                marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
+                marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
+                n = e_getc(p_jpeg, -1); /* sample precision (= 8 or 12) */
+                if (n != 8)
+                {
+                    return(-1); /* Unsupported sample precision */
+                }
+                p_jpeg->y_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
+                p_jpeg->y_size |= e_getc(p_jpeg, -1); /* Lowbyte */
+                p_jpeg->x_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
+                p_jpeg->x_size |= e_getc(p_jpeg, -1); /* Lowbyte */
+
+                n = (marker_size-2-6)/3;
+                if (e_getc(p_jpeg, -1) != n || (n != 1 && n != 3))
+                {
+                    return(-2); /* Unsupported SOF0 component specification */
+                }
+                for (i=0; i<n; i++)
+                {
+                    /* Component info */
+                    p_jpeg->frameheader[i].ID = e_getc(p_jpeg, -1);
+                    p_jpeg->frameheader[i].horizontal_sampling =
+                        (c = e_getc(p_jpeg, -1)) >> 4;
+                    p_jpeg->frameheader[i].vertical_sampling = c & 0x0F;
+                    p_jpeg->frameheader[i].quanttable_select =
+                        e_getc(p_jpeg, -1);
+                    if (p_jpeg->frameheader[i].horizontal_sampling > 2
+                     || p_jpeg->frameheader[i].vertical_sampling > 2)
+                    return -3; /* Unsupported SOF0 subsampling */
+                }
+                p_jpeg->blocks = n;
+            }
+            break;
+
+        case 0xC1: /* SOF Huff  - Extended sequential DCT*/
+        case 0xC2: /* SOF Huff  - Progressive DCT*/
+        case 0xC3: /* SOF Huff  - Spatial (sequential) lossless*/
+        case 0xC5: /* SOF Huff  - Differential sequential DCT*/
+        case 0xC6: /* SOF Huff  - Differential progressive DCT*/
+        case 0xC7: /* SOF Huff  - Differential spatial*/
+        case 0xC8: /* SOF Arith - Reserved for JPEG extensions*/
+        case 0xC9: /* SOF Arith - Extended sequential DCT*/
+        case 0xCA: /* SOF Arith - Progressive DCT*/
+        case 0xCB: /* SOF Arith - Spatial (sequential) lossless*/
+        case 0xCD: /* SOF Arith - Differential sequential DCT*/
+        case 0xCE: /* SOF Arith - Differential progressive DCT*/
+        case 0xCF: /* SOF Arith - Differential spatial*/
+            {
+                return (-4); /* other DCT model than baseline not implemented */
+            }
+
+        case 0xC4: /* Define Huffman Table(s) */
+            {
+                ret |= DHT;
+                marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
+                marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
+                marker_size -= 2;
+
+                while (marker_size > 17) /* another table */
+                {
+                    c = e_getc(p_jpeg, -1);
+                    marker_size--;
+                    int sum = 0;
+                    i = c & 0x0F; /* table index */
+                    if (i > 1)
+                    {
+                        return (-5); /* Huffman table index out of range */
+                    } else {
+                        if (c & 0xF0) /* AC table */
+                        {
+                            for (j=0; j<16; j++)
+                            {
+                                p_jpeg->hufftable[i].huffmancodes_ac[j] =
+                                    (c = e_getc(p_jpeg, -1));
+                                sum += c;
+                                marker_size -= 1;
+                            }
+                            if(16 + sum > AC_LEN)
+                                return -10; /* longer than allowed */
+
+                            for (; j < 16 + sum; j++)
+                            {
+                                p_jpeg->hufftable[i].huffmancodes_ac[j] =
+                                    e_getc(p_jpeg, -1);
+                                marker_size--;
+                            }
+                        }
+                        else /* DC table */
+                        {
+                            for (j=0; j<16; j++)
+                            {
+                                p_jpeg->hufftable[i].huffmancodes_dc[j] =
+                                    (c = e_getc(p_jpeg, -1));
+                                sum += c;
+                                marker_size--;
+                            }
+                            if(16 + sum > DC_LEN)
+                                return -11; /* longer than allowed */
+
+                            for (; j < 16 + sum; j++)
+                            {
+                                p_jpeg->hufftable[i].huffmancodes_dc[j] =
+                                    e_getc(p_jpeg, -1);
+                                marker_size--;
+                            }
+                        }
+                    }
+                } /* while */
+                e_skip_bytes(p_jpeg, marker_size);
+            }
+            break;
+
+        case 0xCC: /* Define Arithmetic coding conditioning(s) */
+            return(-6); /* Arithmetic coding not supported */
+
+        case 0xD8: /* Start of Image */
+        case 0xD9: /* End of Image */
+        case 0x01: /* for temp private use arith code */
+            break; /* skip parameterless marker */
+
+
+        case 0xDA: /* Start of Scan */
+            {
+                ret |= SOS;
+                marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
+                marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
+                marker_size -= 2;
+
+                n = (marker_size-1-3)/2;
+                if (e_getc(p_jpeg, -1) != n || (n != 1 && n != 3))
+                {
+                    return (-7); /* Unsupported SOS component specification */
+                }
+                marker_size--;
+                for (i=0; i<n; i++)
+                {
+                    p_jpeg->scanheader[i].ID = e_getc(p_jpeg, -1);
+                    p_jpeg->scanheader[i].DC_select = (c = e_getc(p_jpeg, -1))
+                        >> 4;
+                    p_jpeg->scanheader[i].AC_select = c & 0x0F;
+                    marker_size -= 2;
+                }
+                /* skip spectral information */
+                e_skip_bytes(p_jpeg, marker_size);
+            }
+            break;
+
+        case 0xDB: /* Define quantization Table(s) */
+            {
+                ret |= DQT;
+                marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
+                marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
+                marker_size -= 2;
+
+                n = (marker_size)/(QUANT_TABLE_LENGTH+1); /* # of tables */
+                for (i=0; i<n; i++)
+                {
+                    int id = e_getc(p_jpeg, -1); /* ID */
+                    marker_size--;
+                    if (id >= 4)
+                    {
+                        return (-8); /* Unsupported quantization table */
+                    }
+                    /* Read Quantisation table: */
+                    for (j=0; j<QUANT_TABLE_LENGTH; j++)
+                    {
+                        p_jpeg->quanttable[id][j] = e_getc(p_jpeg, -1);
+                        marker_size--;
+                    }
+                }
+                e_skip_bytes(p_jpeg, marker_size);
+            }
+            break;
+
+        case 0xDD: /* Define Restart Interval */
+            {
+                marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
+                marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
+                marker_size -= 4;
+                /* Highbyte */
+                p_jpeg->restart_interval = e_getc(p_jpeg, -1) << 8;
+                p_jpeg->restart_interval |= e_getc(p_jpeg, -1); /* Lowbyte */
+                e_skip_bytes(p_jpeg, marker_size); /* skip segment */
+            }
+            break;
+
+        case 0xDC: /* Define Number of Lines */
+        case 0xDE: /* Define Hierarchical progression */
+        case 0xDF: /* Expand Reference Component(s) */
+        case 0xE0: /* Application Field 0*/
+        case 0xE1: /* Application Field 1*/
+        case 0xE2: /* Application Field 2*/
+        case 0xE3: /* Application Field 3*/
+        case 0xE4: /* Application Field 4*/
+        case 0xE5: /* Application Field 5*/
+        case 0xE6: /* Application Field 6*/
+        case 0xE7: /* Application Field 7*/
+        case 0xE8: /* Application Field 8*/
+        case 0xE9: /* Application Field 9*/
+        case 0xEA: /* Application Field 10*/
+        case 0xEB: /* Application Field 11*/
+        case 0xEC: /* Application Field 12*/
+        case 0xED: /* Application Field 13*/
+        case 0xEE: /* Application Field 14*/
+        case 0xEF: /* Application Field 15*/
+        case 0xFE: /* Comment */
+            {
+                marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
+                marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
+                marker_size -= 2;
+                e_skip_bytes(p_jpeg, marker_size); /* skip segment */
+            }
+            break;
+
+        case 0xF0: /* Reserved for JPEG extensions */
+        case 0xF1: /* Reserved for JPEG extensions */
+        case 0xF2: /* Reserved for JPEG extensions */
+        case 0xF3: /* Reserved for JPEG extensions */
+        case 0xF4: /* Reserved for JPEG extensions */
+        case 0xF5: /* Reserved for JPEG extensions */
+        case 0xF6: /* Reserved for JPEG extensions */
+        case 0xF7: /* Reserved for JPEG extensions */
+        case 0xF8: /* Reserved for JPEG extensions */
+        case 0xF9: /* Reserved for JPEG extensions */
+        case 0xFA: /* Reserved for JPEG extensions */
+        case 0xFB: /* Reserved for JPEG extensions */
+        case 0xFC: /* Reserved for JPEG extensions */
+        case 0xFD: /* Reserved for JPEG extensions */
+        case 0x02: /* Reserved */
+        default:
+            return (-9); /* Unknown marker */
+        } /* switch */
+    } /* while */
+
+    return (ret); /* return flags with seen markers */
+}
+
+static const struct huffman_table luma_table =
+{
+    {
+        0x00,0x01,0x05,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,
+        0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
+    },
+    {
+        0x00,0x02,0x01,0x03,0x03,0x02,0x04,0x03,0x05,0x05,0x04,0x04,0x00,0x00,
+        0x01,0x7D,0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,
+        0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08,0x23,0x42,
+        0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,
+        0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28,0x29,0x2A,0x34,0x35,0x36,0x37,
+        0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,
+        0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,
+        0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
+        0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,
+        0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,
+        0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,
+        0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,
+        0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA
+    }
+};
+
+static const struct huffman_table chroma_table =
+{
+    {
+        0x00,0x03,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,
+        0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
+    },
+    {
+        0x00,0x02,0x01,0x02,0x04,0x04,0x03,0x04,0x07,0x05,0x04,0x04,0x00,0x01,
+        0x02,0x77,0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,
+        0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xA1,0xB1,
+        0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,
+        0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26,0x27,0x28,0x29,0x2A,0x35,0x36,
+        0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,
+        0x55,0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,
+        0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
+        0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,
+        0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,
+        0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,
+        0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,
+        0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA
+    }
+};
+
+static void default_huff_tbl(struct jpeg* p_jpeg)
+{
+
+    MEMCPY(&p_jpeg->hufftable[0], &luma_table, sizeof(luma_table));
+    MEMCPY(&p_jpeg->hufftable[1], &chroma_table, sizeof(chroma_table));
+
+    return;
+}
+
+/* Compute the derived values for a Huffman table */
+static void fix_huff_tbl(int* htbl, struct derived_tbl* dtbl)
+{
+    int p, i, l, si;
+    int lookbits, ctr;
+    char huffsize[257];
+    unsigned int huffcode[257];
+    unsigned int code;
+
+    dtbl->pub = htbl; /* fill in back link */
+
+    /* Figure C.1: make table of Huffman code length for each symbol */
+    /* Note that this is in code-length order. */
+
+    p = 0;
+    for (l = 1; l <= 16; l++)
+    {    /* all possible code length */
+        for (i = 1; i <= (int) htbl[l-1]; i++)  /* all codes per length */
+            huffsize[p++] = (char) l;
+    }
+    huffsize[p] = 0;
+
+    /* Figure C.2: generate the codes themselves */
+    /* Note that this is in code-length order. */
+
+    code = 0;
+    si = huffsize[0];
+    p = 0;
+    while (huffsize[p])
+    {
+        while (((int) huffsize[p]) == si)
+        {
+            huffcode[p++] = code;
+            code++;
+        }
+        code <<= 1;
+        si++;
+    }
+
+    /* Figure F.15: generate decoding tables for bit-sequential decoding */
+
+    p = 0;
+    for (l = 1; l <= 16; l++)
+    {
+        if (htbl[l-1])
+        {
+            /* huffval[] index of 1st symbol of code length l */
+            dtbl->valptr[l] = p;
+            dtbl->mincode[l] = huffcode[p]; /* minimum code of length l */
+            p += htbl[l-1];
+            dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
+        }
+        else
+        {
+            dtbl->maxcode[l] = -1;  /* -1 if no codes of this length */
+        }
+    }
+    dtbl->maxcode[17] = 0xFFFFFL; /* ensures huff_DECODE terminates */
+
+    /* Compute lookahead tables to speed up decoding.
+    * First we set all the table entries to 0, indicating "too long";
+    * then we iterate through the Huffman codes that are short enough and
+    * fill in all the entries that correspond to bit sequences starting
+    * with that code.
+    */
+
+    MEMSET(dtbl->look_nbits, 0, sizeof(dtbl->look_nbits));
+
+    p = 0;
+    for (l = 1; l <= HUFF_LOOKAHEAD; l++)
+    {
+        for (i = 1; i <= (int) htbl[l-1]; i++, p++)
+        {
+            /* l = current code's length, p = its index in huffcode[] &
+             * huffval[]. Generate left-justified code followed by all possible
+             * bit sequences
+             */
+            lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
+            for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--)
+            {
+                dtbl->look_nbits[lookbits] = l;
+                dtbl->look_sym[lookbits] = htbl[16+p];
+                lookbits++;
+            }
+        }
+    }
+}
+
+
+/* zag[i] is the natural-order position of the i'th element of zigzag order.
+ * If the incoming data is corrupted, decode_mcu could attempt to
+ * reference values beyond the end of the array.  To avoid a wild store,
+ * we put some extra zeroes after the real entries.
+ */
+static const unsigned char zag[] =
+{
+     0,  1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63,
+     0,  0,  0,  0,  0,  0,  0,  0, /* extra entries in case k>63 below */
+     0,  0,  0,  0,  0,  0,  0,  0
+};
+
+/* zig[i] is the the zig-zag order position of the i'th element of natural
+ * order, reading left-to-right then top-to-bottom.
+ */
+static const unsigned char zig[] =
+{
+     0,  1,  5,  6, 14, 15, 27, 28,
+     2,  4,  7, 13, 16, 26, 29, 42,
+     3,  8, 12, 17, 25, 30, 41, 43,
+     9, 11, 18, 24, 31, 40, 44, 53,
+    10, 19, 23, 32, 39, 45, 52, 54,
+    20, 22, 33, 38, 46, 51, 55, 60,
+    21, 34, 37, 47, 50, 56, 59, 61,
+    35, 36, 48, 49, 57, 58, 62, 63
+};
+
+/* Reformat some image header data so that the decoder can use it properly. */
+INLINE void fix_headers(struct jpeg* p_jpeg)
+{
+    int i;
+
+    for (i=0; i<4; i++)
+        p_jpeg->store_pos[i] = i; /* default ordering */
+
+    /* assignments for the decoding of blocks */
+    if (p_jpeg->frameheader[0].horizontal_sampling == 2
+        && p_jpeg->frameheader[0].vertical_sampling == 1)
+    {   /* 4:2:2 */
+        p_jpeg->blocks = 4;
+        p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
+        p_jpeg->x_phys = p_jpeg->x_mbl * 16;
+        p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
+        p_jpeg->y_phys = p_jpeg->y_mbl * 8;
+        p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
+        p_jpeg->mcu_membership[1] = 0;
+        p_jpeg->mcu_membership[2] = 1;
+        p_jpeg->mcu_membership[3] = 2;
+        p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
+        p_jpeg->tab_membership[1] = 0;
+        p_jpeg->tab_membership[2] = 1;
+        p_jpeg->tab_membership[3] = 1;
+        p_jpeg->subsample_x[0] = 1;
+        p_jpeg->subsample_x[1] = 2;
+        p_jpeg->subsample_x[2] = 2;
+        p_jpeg->subsample_y[0] = 1;
+        p_jpeg->subsample_y[1] = 1;
+        p_jpeg->subsample_y[2] = 1;
+    }
+    if (p_jpeg->frameheader[0].horizontal_sampling == 1
+        && p_jpeg->frameheader[0].vertical_sampling == 2)
+    {   /* 4:2:2 vertically subsampled */
+        p_jpeg->store_pos[1] = 2; /* block positions are mirrored */
+        p_jpeg->store_pos[2] = 1;
+        p_jpeg->blocks = 4;
+        p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
+        p_jpeg->x_phys = p_jpeg->x_mbl * 8;
+        p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
+        p_jpeg->y_phys = p_jpeg->y_mbl * 16;
+        p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
+        p_jpeg->mcu_membership[1] = 0;
+        p_jpeg->mcu_membership[2] = 1;
+        p_jpeg->mcu_membership[3] = 2;
+        p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
+        p_jpeg->tab_membership[1] = 0;
+        p_jpeg->tab_membership[2] = 1;
+        p_jpeg->tab_membership[3] = 1;
+        p_jpeg->subsample_x[0] = 1;
+        p_jpeg->subsample_x[1] = 1;
+        p_jpeg->subsample_x[2] = 1;
+        p_jpeg->subsample_y[0] = 1;
+        p_jpeg->subsample_y[1] = 2;
+        p_jpeg->subsample_y[2] = 2;
+    }
+    else if (p_jpeg->frameheader[0].horizontal_sampling == 2
+        && p_jpeg->frameheader[0].vertical_sampling == 2)
+    {   /* 4:2:0 */
+        p_jpeg->blocks = 6;
+        p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
+        p_jpeg->x_phys = p_jpeg->x_mbl * 16;
+        p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
+        p_jpeg->y_phys = p_jpeg->y_mbl * 16;
+        p_jpeg->mcu_membership[0] = 0;
+        p_jpeg->mcu_membership[1] = 0;
+        p_jpeg->mcu_membership[2] = 0;
+        p_jpeg->mcu_membership[3] = 0;
+        p_jpeg->mcu_membership[4] = 1;
+        p_jpeg->mcu_membership[5] = 2;
+        p_jpeg->tab_membership[0] = 0;
+        p_jpeg->tab_membership[1] = 0;
+        p_jpeg->tab_membership[2] = 0;
+        p_jpeg->tab_membership[3] = 0;
+        p_jpeg->tab_membership[4] = 1;
+        p_jpeg->tab_membership[5] = 1;
+        p_jpeg->subsample_x[0] = 1;
+        p_jpeg->subsample_x[1] = 2;
+        p_jpeg->subsample_x[2] = 2;
+        p_jpeg->subsample_y[0] = 1;
+        p_jpeg->subsample_y[1] = 2;
+        p_jpeg->subsample_y[2] = 2;
+    }
+    else if (p_jpeg->frameheader[0].horizontal_sampling == 1
+        && p_jpeg->frameheader[0].vertical_sampling == 1)
+    {   /* 4:4:4 */
+        /* don't overwrite p_jpeg->blocks */
+        p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
+        p_jpeg->x_phys = p_jpeg->x_mbl * 8;
+        p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
+        p_jpeg->y_phys = p_jpeg->y_mbl * 8;
+        p_jpeg->mcu_membership[0] = 0;
+        p_jpeg->mcu_membership[1] = 1;
+        p_jpeg->mcu_membership[2] = 2;
+        p_jpeg->tab_membership[0] = 0;
+        p_jpeg->tab_membership[1] = 1;
+        p_jpeg->tab_membership[2] = 1;
+        p_jpeg->subsample_x[0] = 1;
+        p_jpeg->subsample_x[1] = 1;
+        p_jpeg->subsample_x[2] = 1;
+        p_jpeg->subsample_y[0] = 1;
+        p_jpeg->subsample_y[1] = 1;
+        p_jpeg->subsample_y[2] = 1;
+    }
+    else
+    {
+        /* error */
+    }
+
+}
+
+INLINE void fix_huff_tables(struct jpeg *p_jpeg)
+{
+    fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_dc,
+        &p_jpeg->dc_derived_tbls[0]);
+    fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_ac,
+        &p_jpeg->ac_derived_tbls[0]);
+    fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_dc,
+        &p_jpeg->dc_derived_tbls[1]);
+    fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_ac,
+        &p_jpeg->ac_derived_tbls[1]);
+}
+
+/* Because some of the IDCT routines never multiply by any constants, and
+ * therefore do not produce shifted output, we add the shift into the
+ * quantization table when one of these IDCT routines is used, rather than
+ * have the IDCT shift each value it processes.
+ */
+INLINE void fix_quant_tables(struct jpeg *p_jpeg)
+{
+    int shift, i, x, y, a;
+    for (i = 0; i < 2; i++)
+    {
+        shift = idct_tbl[p_jpeg->v_scale[i]].v_scale +
+            idct_tbl[p_jpeg->h_scale[i]].h_scale;
+        if (shift)
+        {
+            a = 0;
+            for (y = 0; y < 1 << p_jpeg->h_scale[i]; y++)
+            {
+                for (x = 0; x < 1 << p_jpeg->v_scale[i]; x++)
+                    p_jpeg->quanttable[i][zig[a+x]] <<= shift;
+                a += 8;
+            }
+        }
+    }
+}
+
+/*
+* These functions/macros provide the in-line portion of bit fetching.
+* Use check_bit_buffer to ensure there are N bits in get_buffer
+* before using get_bits, peek_bits, or drop_bits.
+*  check_bit_buffer(state,n,action);
+*    Ensure there are N bits in get_buffer; if suspend, take action.
+*  val = get_bits(n);
+*    Fetch next N bits.
+*  val = peek_bits(n);
+*    Fetch next N bits without removing them from the buffer.
+*  drop_bits(n);
+*    Discard next N bits.
+* The value N should be a simple variable, not an expression, because it
+* is evaluated multiple times.
+*/
+
+static void fill_bit_buffer(struct jpeg* p_jpeg)
+{
+    unsigned char byte, marker;
+
+    if (p_jpeg->marker_val)
+        p_jpeg->marker_ind += 16;
+    byte = d_getc(p_jpeg, 0);
+    if (byte == 0xFF) /* legal marker can be byte stuffing or RSTm */
+    {   /* simplification: just skip the (one-byte) marker code */
+        marker = d_getc(p_jpeg, 0);
+        if ((marker & ~7) == 0xD0)
+        {
+            p_jpeg->marker_val = marker;
+            p_jpeg->marker_ind = 8;
+        }
+    }
+    p_jpeg->bitbuf = (p_jpeg->bitbuf << 8) | byte;
+
+    byte = d_getc(p_jpeg, 0);
+    if (byte == 0xFF) /* legal marker can be byte stuffing or RSTm */
+    {   /* simplification: just skip the (one-byte) marker code */
+        marker = d_getc(p_jpeg, 0);
+        if ((marker & ~7) == 0xD0)
+        {
+            p_jpeg->marker_val = marker;
+            p_jpeg->marker_ind = 0;
+        }
+    }
+    p_jpeg->bitbuf = (p_jpeg->bitbuf << 8) | byte;
+    p_jpeg->bitbuf_bits += 16;
+#ifdef JPEG_BS_DEBUG
+    DEBUGF("read in: %X\n", p_jpeg->bitbuf & 0xFFFF);
+#endif
+}
+
+INLINE void check_bit_buffer(struct jpeg *p_jpeg, int nbits)
+{
+    if (nbits > p_jpeg->bitbuf_bits)
+        fill_bit_buffer(p_jpeg);
+}
+
+INLINE int get_bits(struct jpeg *p_jpeg, int nbits)
+{
+#ifdef JPEG_BS_DEBUG
+    if (nbits > p_jpeg->bitbuf_bits)
+        DEBUGF("bitbuffer underrun\n");
+    int mask = 1 << (p_jpeg->bitbuf_bits - 1);
+    int i;
+    DEBUGF("get %d bits: ", nbits);
+    for (i = 0; i < nbits; i++)
+        DEBUGF("%d",!!(p_jpeg->bitbuf & (mask >>= 1)));
+    DEBUGF("\n");
+#endif
+    return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits -= nbits))) &
+        ((1<<nbits)-1);
+}
+
+INLINE int peek_bits(struct jpeg *p_jpeg, int nbits)
+{
+#ifdef JPEG_BS_DEBUG
+    int mask = 1 << (p_jpeg->bitbuf_bits - 1);
+    int i;
+    DEBUGF("peek %d bits: ", nbits);
+    for (i = 0; i < nbits; i++)
+        DEBUGF("%d",!!(p_jpeg->bitbuf & (mask >>= 1)));
+    DEBUGF("\n");
+#endif
+    return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits - nbits))) &
+        ((1<<nbits)-1);
+}
+
+INLINE void drop_bits(struct jpeg *p_jpeg, int nbits)
+{
+#ifdef JPEG_BS_DEBUG
+    int mask = 1 << (p_jpeg->bitbuf_bits - 1);
+    int i;
+    DEBUGF("drop %d bits: ", nbits);
+    for (i = 0; i < nbits; i++)
+        DEBUGF("%d",!!(p_jpeg->bitbuf & (mask >>= 1)));
+    DEBUGF("\n");
+#endif
+    p_jpeg->bitbuf_bits -= nbits;
+}
+
+/* re-synchronize to entropy data (skip restart marker) */
+static void search_restart(struct jpeg *p_jpeg)
+{
+    if (p_jpeg->marker_val)
+    {
+        p_jpeg->marker_val = 0;
+        p_jpeg->bitbuf_bits = p_jpeg->marker_ind;
+        p_jpeg->marker_ind = 0;
+        return;
+    }
+    unsigned char byte;
+    p_jpeg->bitbuf_bits = 0;
+    while ((byte = d_getc(p_jpeg, 0xFF)))
+    {
+        if (byte == 0xff)
+        {
+            byte = d_getc(p_jpeg, 0xD0);
+            if ((byte & ~7) == 0xD0)
+            {
+                return;
+            }
+            else
+                putc(p_jpeg);
+        }
+    }
+}
+
+/* Figure F.12: extend sign bit. */
+#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] =   /* entry n is 2**(n-1) */
+{
+    0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
+};
+
+static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
+{
+    0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
+    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
+    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
+    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1
+};
+
+/* Decode a single value */
+INLINE int huff_decode_dc(struct jpeg *p_jpeg, struct derived_tbl* tbl)
+{
+    int nb, look, s, r;
+
+    check_bit_buffer(p_jpeg, HUFF_LOOKAHEAD);
+    look = peek_bits(p_jpeg, HUFF_LOOKAHEAD);
+    if ((nb = tbl->look_nbits[look]) != 0)
+    {
+        drop_bits(p_jpeg, nb);
+        s = tbl->look_sym[look];
+        check_bit_buffer(p_jpeg, s);
+        r = get_bits(p_jpeg, s);
+        s = HUFF_EXTEND(r, s);
+    }
+    else
+    {   /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */
+        long code;
+        nb=HUFF_LOOKAHEAD+1;
+        check_bit_buffer(p_jpeg, nb);
+        code = get_bits(p_jpeg, nb);
+        while (code > tbl->maxcode[nb])
+        {
+            code <<= 1;
+            check_bit_buffer(p_jpeg, 1);
+            code |= get_bits(p_jpeg, 1);
+            nb++;
+        }
+        if (nb > 16) /* error in Huffman */
+        {
+            s=0; /* fake a zero, this is most safe */
+        }
+        else
+        {
+            s = tbl->pub[16 + tbl->valptr[nb] +
+                ((int) (code - tbl->mincode[nb]))];
+            check_bit_buffer(p_jpeg, s);
+            r = get_bits(p_jpeg, s);
+            s = HUFF_EXTEND(r, s);
+        }
+    } /* end slow decode */
+    return s;
+}
+
+INLINE int huff_decode_ac(struct jpeg *p_jpeg, struct derived_tbl* tbl)
+{
+    int nb, look, s;
+
+    check_bit_buffer(p_jpeg, HUFF_LOOKAHEAD);
+    look = peek_bits(p_jpeg, HUFF_LOOKAHEAD);
+    if ((nb = tbl->look_nbits[look]) != 0)
+    {
+        drop_bits(p_jpeg, nb);
+        s = tbl->look_sym[look];
+    }
+    else
+    {   /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */
+        long code;
+        nb=HUFF_LOOKAHEAD+1;
+        check_bit_buffer(p_jpeg, nb);
+        code = get_bits(p_jpeg, nb);
+        while (code > tbl->maxcode[nb])
+        {
+            code <<= 1;
+            check_bit_buffer(p_jpeg, 1);
+            code |= get_bits(p_jpeg, 1);
+            nb++;
+        }
+        if (nb > 16) /* error in Huffman */
+        {
+            s=0; /* fake a zero, this is most safe */
+        }
+        else
+        {
+            s = tbl->pub[16 + tbl->valptr[nb] +
+                ((int) (code - tbl->mincode[nb]))];
+        }
+    } /* end slow decode */
+    return s;
+}
+
+struct img_part *store_row_jpeg(void *jpeg_args)
+{
+    struct jpeg *p_jpeg = (struct jpeg*) jpeg_args;
+    unsigned int width = p_jpeg->x_mbl << p_jpeg->h_scale[1];
+    unsigned int b_width = width * JPEG_PIX_SZ;
+    int height = 1U << p_jpeg->v_scale[1];
+    int x;
+    if (!p_jpeg->mcu_row) /* Need to decode a new row of MCUs */
+    {
+        p_jpeg->out_ptr = (unsigned char *)p_jpeg->img_buf;
+        int store_offs[4];
+        int mcu_offset = JPEG_PIX_SZ << p_jpeg->h_scale[1];
+        unsigned char *out = p_jpeg->out_ptr;
+        store_offs[p_jpeg->store_pos[0]] = 0;
+        store_offs[p_jpeg->store_pos[1]] = JPEG_PIX_SZ << p_jpeg->h_scale[0];
+        store_offs[p_jpeg->store_pos[2]] = b_width << p_jpeg->v_scale[0];
+        store_offs[p_jpeg->store_pos[3]] = store_offs[1] + store_offs[2];
+
+        int block[128]; /* decoded DCT coefficients */
+        for (x = 0; x < p_jpeg->x_mbl; x++)
+        {
+            int blkn;
+            for (blkn = 0; blkn < p_jpeg->blocks; blkn++)
+            {
+                int k = 1; /* coefficient index */
+                int s, r; /* huffman values */
+                int ci = p_jpeg->mcu_membership[blkn]; /* component index */
+                int ti = p_jpeg->tab_membership[blkn]; /* table index */
+                struct derived_tbl* dctbl = &p_jpeg->dc_derived_tbls[ti];
+                struct derived_tbl* actbl = &p_jpeg->ac_derived_tbls[ti];
+
+                /* Section F.2.2.1: decode the DC coefficient difference */
+                s = huff_decode_dc(p_jpeg, dctbl);
+
+#ifndef HAVE_LCD_COLOR
+                if (!ci)
+#endif
+                {
+#ifdef HAVE_LCD_COLOR
+                    p_jpeg->last_dc_val[ci] += s;
+                    /* output it (assumes zag[0] = 0) */
+                    block[0] = p_jpeg->last_dc_val[ci] *
+                        p_jpeg->quanttable[!!ci][0];
+#else
+                    p_jpeg->last_dc_val += s;
+                    /* output it (assumes zag[0] = 0) */
+                    block[0] = p_jpeg->last_dc_val *
+                        p_jpeg->quanttable[!!ci][0];
+#endif
+                    /* coefficient buffer must be cleared */
+                    MEMSET(block+1, 0, p_jpeg->zero_need[!!ci] * sizeof(int));
+                    /* Section F.2.2.2: decode the AC coefficients */
+                    for (; k < p_jpeg->k_need[!!ci]; k++)
+                    {
+                        s = huff_decode_ac(p_jpeg, actbl);
+                        r = s >> 4;
+                        s &= 15;
+                        if (s)
+                        {
+                            k += r;
+                            check_bit_buffer(p_jpeg, s);
+                            r = get_bits(p_jpeg, s);
+                            r = HUFF_EXTEND(r, s);
+                            int a = zag[k];
+                            if (a <= zag[p_jpeg->k_need[!!ci]] && (a & 7) <=
+                                (zag[p_jpeg->k_need[!!ci]] & 7))
+                            {
+                                r *= p_jpeg->quanttable[!!ci][k];
+                                block[zag[k]] = r ;
+                            }
+                        }
+                        else
+                        {
+                            if (r != 15)
+                            {
+                                k = 64;
+                                break;
+                            }
+                            k += r;
+                        }
+                    }  /* for k */
+                }
+                for (; k < 64; k++)
+                {
+                    s = huff_decode_ac(p_jpeg, actbl);
+                    r = s >> 4;
+                    s &= 15;
+
+                    if (s)
+                    {
+                        k += r;
+                        check_bit_buffer(p_jpeg, s);
+                        drop_bits(p_jpeg, s);
+                    }
+                    else
+                    {
+                        if (r != 15)
+                            break;
+                        k += r;
+                    }
+                }  /* for k */
+#ifndef HAVE_LCD_COLOR
+                if (!ci)
+#endif
+                {
+                    unsigned char si = !!ci;
+                    int idct_cols = 1 << MIN(p_jpeg->h_scale[si], 3);
+                    int idct_rows = 1 << p_jpeg->v_scale[si];
+                    unsigned char *b_out = out + (ci ? ci : store_offs[blkn]);
+                    if (idct_tbl[p_jpeg->v_scale[si]].v_idct)
+                        idct_tbl[p_jpeg->v_scale[si]].v_idct(block, idct_cols);
+                    idct_tbl[p_jpeg->h_scale[si]].h_idct(block, b_out,
+                        idct_rows, b_width);
+                }
+            } /* for blkn */
+            /* don't starve other threads while an MCU row decodes */
+            yield();
+#ifdef HAVE_LCD_COLOR
+            unsigned int xp;
+            int yp;
+            unsigned char *row = out;
+            if (p_jpeg->blocks > 1) {
+                for (yp = 0; yp < height; yp++, row += b_width)
+                {
+                    unsigned char *px = row;
+                    for (xp = 0; xp < 1U << p_jpeg->h_scale[1];
+                        xp++, px += JPEG_PIX_SZ)
+                    {
+                        int y, u, v, rv, guv, bu;
+                        y = px[0] * YFAC + (YFAC >> 1);
+                        u = px[1] - 128;
+                        v = px[2] - 128;
+                        rv = RVFAC * v;
+                        guv = GUFAC * u + GVFAC * v;
+                        bu = BUFAC * u;
+                        struct uint8_rgb *rgb = (struct uint8_rgb *)px;
+                        rgb->red = clamp_component((y + rv) / YFAC);
+                        rgb->green = clamp_component((y + guv) / YFAC);
+                        rgb->blue = clamp_component((y + bu) / YFAC);
+                    }
+                }
+            } else {
+                for (yp = 0; yp < height; yp++, row += b_width)
+                {
+                    unsigned char *px = row;
+                    for (xp = 0; xp < 1U << p_jpeg->h_scale[1];
+                        xp++, px += JPEG_PIX_SZ)
+                    {
+                        px[1] = px[2] = px[0];
+                    }
+                }
+            }
+#endif
+            out += mcu_offset;
+            if (p_jpeg->restart_interval && --p_jpeg->restart == 0)
+            {   /* if a restart marker is due: */
+                p_jpeg->restart = p_jpeg->restart_interval; /* count again */
+                search_restart(p_jpeg); /* align the bitstream */
+#ifdef HAVE_LCD_COLOR
+                p_jpeg->last_dc_val[0] = p_jpeg->last_dc_val[1] =
+                                 p_jpeg->last_dc_val[2] = 0; /* reset decoder */
+#else
+                p_jpeg->last_dc_val = 0;
+#endif
+            }
+        }
+    } /* if !p_jpeg->mcu_row */
+    p_jpeg->mcu_row = (p_jpeg->mcu_row + 1) & (height - 1);
+    p_jpeg->part.len = width;
+    p_jpeg->part.buf = (jpeg_pix_t *)p_jpeg->out_ptr;
+    p_jpeg->out_ptr += b_width;
+    return &(p_jpeg->part);
+}
+
+/******************************************************************************
+ * read_jpeg_file()
+ *
+ * Reads a JPEG file and puts the data in rockbox format in *bitmap.
+ *
+ *****************************************************************************/
+int read_jpeg_file(const char* filename,
+                   struct bitmap *bm,
+                   int maxsize,
+                   int format,
+                   const struct custom_format *cformat)
+{
+    int fd, ret;
+    fd = open(filename, O_RDONLY);
+
+    /* Exit if file opening failed */
+    if (fd < 0) {
+        DEBUGF("read_jpeg_file: can't open '%s', rc: %d\n", filename, fd);
+        return fd * 10 - 1;
+    }
+
+    ret = read_jpeg_fd(fd, bm, maxsize, format, cformat);
+    close(fd);
+    return ret;
+}
+
+static int calc_scale(int in_size, int out_size, int subsample)
+{
+    int scale = 0;
+    out_size <<= 3;
+    for (scale = 0; scale < 5 - subsample; scale++)
+    {
+        if (out_size <= in_size)
+            break;
+        else
+            in_size <<= 1;
+    }
+    return scale;
+}
+
+int read_jpeg_fd(int fd,
+                 struct bitmap *bm,
+                 int maxsize,
+                 int format,
+                 const struct custom_format *cformat)
+{
+    bool resize = false, dither = false;
+    struct rowset rset;
+    struct dim src_dim;
+    struct jpeg *p_jpeg = (struct jpeg*)bm->data;
+    int tmp_size = maxsize;
+    int status;
+    int bm_size;
+    ALIGN_BUFFER(p_jpeg, tmp_size, sizeof(int));
+    /* not enough memory for our struct jpeg */
+    if ((size_t)tmp_size < sizeof(struct jpeg))
+        return -1;
+
+    memset(p_jpeg, 0, sizeof(struct jpeg));
+    p_jpeg->fd = fd;
+    status = process_markers(p_jpeg);
+    if (status < 0)
+        return status;
+    if ((status & (DQT | SOF0)) != (DQT | SOF0))
+        return -(status * 16);
+    if (!(status & DHT)) /* if no Huffman table present: */
+        default_huff_tbl(p_jpeg); /* use default */
+    fix_headers(p_jpeg); /* derive Huffman and other lookup-tables */
+    src_dim.width = p_jpeg->x_size;
+    src_dim.height = p_jpeg->y_size;
+    if (format & FORMAT_RESIZE)
+        resize = true;
+    if (format & FORMAT_DITHER)
+        dither = true;
+    if (resize) {
+        struct dim resize_dim = {
+            .width = bm->width,
+            .height = bm->height,
+        };
+        if (format & FORMAT_KEEP_ASPECT)
+            recalc_dimension(&resize_dim, &src_dim);
+        bm->width = resize_dim.width;
+        bm->height = resize_dim.height;
+        if (bm->width == src_dim.width && bm->height == src_dim.height)
+            resize = false;
+    } else {
+        bm->width = p_jpeg->x_size;
+        bm->height = p_jpeg->y_size;
+    }
+    p_jpeg->h_scale[0] = calc_scale(p_jpeg->x_size, bm->width,
+        p_jpeg->frameheader[0].horizontal_sampling);
+    p_jpeg->v_scale[0] = calc_scale(p_jpeg->y_size, bm->height,
+        p_jpeg->frameheader[0].vertical_sampling);
+    p_jpeg->h_scale[1] = p_jpeg->h_scale[0] +
+        p_jpeg->frameheader[0].horizontal_sampling - 1;
+    p_jpeg->v_scale[1] = p_jpeg->v_scale[0] +
+        p_jpeg->frameheader[0].vertical_sampling - 1;
+    fix_quant_tables(p_jpeg);
+    int decode_w = (1 << MIN(p_jpeg->h_scale[0],3)) - 1;
+    int decode_h = (1 << MIN(p_jpeg->v_scale[0],3)) - 1;
+    src_dim.width = (p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3;
+    src_dim.height = (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3;
+    p_jpeg->zero_need[0] = (decode_h << 3) + decode_w;
+    p_jpeg->k_need[0] = zig[p_jpeg->zero_need[0]];
+    decode_w = (1 << MIN(p_jpeg->h_scale[1],3)) - 1;
+    decode_h = (1 << MIN(p_jpeg->v_scale[1],3)) - 1;
+    p_jpeg->zero_need[1] = p_jpeg->zero_need[2] = (decode_h << 3) + decode_w;
+    p_jpeg->k_need[1] = p_jpeg->k_need[2] = zig[p_jpeg->zero_need[1]];
+    if (cformat)
+        bm_size = cformat->get_size(bm);
+    else
+        bm_size = BM_SIZE(bm->width,bm->height,FORMAT_NATIVE,false);
+    if (bm_size > maxsize)
+        return -1;
+    char *buf_start = (char *)bm->data + bm_size;
+    char *buf_end = (char *)bm->data + maxsize;
+    maxsize = buf_end - buf_start;
+    ALIGN_BUFFER(buf_start, maxsize, sizeof(uint32_t));
+    if (maxsize < (int)sizeof(struct jpeg))
+        return -1;
+    memmove(buf_start, p_jpeg, sizeof(struct jpeg));
+    p_jpeg = (struct jpeg *)buf_start;
+    fix_huff_tables(p_jpeg);
+    buf_start += sizeof(struct jpeg);
+    maxsize = buf_end - buf_start;
+    int decode_buf_size = (p_jpeg->x_mbl << p_jpeg->h_scale[1])
+        << p_jpeg->v_scale[1];
+    decode_buf_size *= JPEG_PIX_SZ;
+    p_jpeg->img_buf = (jpeg_pix_t *)buf_start;
+    if (buf_end - buf_start < decode_buf_size)
+        return -1;
+    buf_start += decode_buf_size;
+    maxsize = buf_end - buf_start;
+    memset(p_jpeg->img_buf, 0, decode_buf_size);
+    p_jpeg->mcu_row = 0;
+    p_jpeg->restart = p_jpeg->restart_interval;
+    rset.rowstart = 0;
+    rset.rowstop = bm->height;
+    rset.rowstep = 1;
+    if (resize_on_load(bm, dither, &src_dim, &rset, buf_start, maxsize, cformat,
+        store_row_jpeg, p_jpeg))
+        return bm_size;
+    else
+        return 0;
+}
+
+/**************** end JPEG code ********************/
diff --git a/apps/recorder/jpeg_load.h b/apps/recorder/jpeg_load.h
new file mode 100644
index 0000000000..73b6c51bf3
--- /dev/null
+++ b/apps/recorder/jpeg_load.h
@@ -0,0 +1,47 @@
+/***************************************************************************
+*             __________               __   ___.
+*   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+*   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+*   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+*   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+*                     \/            \/     \/    \/            \/
+* $Id$
+*
+* JPEG image viewer
+* (This is a real mess if it has to be coded in one single C file)
+*
+* File scrolling addition (C) 2005 Alexander Spyridakis
+* Copyright (C) 2004 Jörg Hohensohn aka [IDC]Dragon
+* Heavily borrowed from the IJG implementation (C) Thomas G. Lane
+* Small & fast downscaling IDCT (C) 2002 by Guido Vollbeding  JPEGclub.org
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License
+* as published by the Free Software Foundation; either version 2
+* of the License, or (at your option) any later version.
+*
+* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+* KIND, either express or implied.
+*
+****************************************************************************/
+
+#include "resize.h"
+#include "bmp.h"
+#include "jpeg_common.h"
+
+#ifndef _JPEG_LOAD_H
+#define _JPEG_LOAD_H
+
+int read_jpeg_file(const char* filename,
+                   struct bitmap *bm,
+                   int maxsize,
+                   int format,
+                   const struct custom_format *cformat);
+
+int read_jpeg_fd(int fd,
+                 struct bitmap *bm,
+                 int maxsize,
+                 int format,
+                 const struct custom_format *cformat);
+
+#endif /* _JPEG_JPEG_DECODER_H */