rockbox/apps/plugins/lib/gray_core.c
Jens Arnold bd7248d106 * Use fast multiplication macro in some more places. * Better pipelining on SH1.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10652 a1c6a512-1295-4272-9138-f99709370657
2006-08-19 06:29:23 +00:00

2226 lines
86 KiB
C

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Greyscale framework
* Core & miscellaneous functions
*
* This is a generic framework to display up to 33 shades of grey
* on low-depth bitmap LCDs (Archos b&w, Iriver 4-grey, iPod 4-grey)
* within plugins.
*
* Copyright (C) 2004-2006 Jens Arnold
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "plugin.h"
#ifdef HAVE_LCD_BITMAP
#include "gray.h"
#if defined(CPU_PP) && defined(HAVE_ADJUSTABLE_CPU_FREQ)
#define NEED_BOOST
#endif
/* Global variables */
struct plugin_api *_gray_rb = NULL; /* global api struct pointer */
struct _gray_info _gray_info; /* global info structure */
#ifndef SIMULATOR
short _gray_random_buffer; /* buffer for random number generator */
#if CONFIG_LCD == LCD_SSD1815
/* measured and interpolated curve */
static const unsigned char lcdlinear[256] = {
0, 3, 5, 8, 11, 13, 16, 18,
21, 23, 26, 28, 31, 33, 36, 38,
40, 42, 45, 47, 49, 51, 53, 55,
57, 59, 60, 62, 64, 66, 67, 69,
70, 72, 73, 74, 76, 77, 78, 79,
81, 82, 83, 84, 85, 86, 87, 88,
88, 89, 90, 91, 92, 92, 93, 94,
95, 95, 96, 97, 97, 98, 99, 99,
100, 101, 102, 102, 103, 104, 104, 105,
106, 106, 107, 107, 108, 109, 109, 110,
111, 111, 112, 113, 113, 114, 114, 115,
116, 116, 117, 117, 118, 119, 119, 120,
120, 121, 121, 122, 122, 123, 123, 124,
124, 125, 125, 126, 126, 127, 127, 128,
128, 128, 129, 129, 130, 130, 131, 131,
132, 132, 133, 133, 133, 134, 134, 135,
135, 136, 136, 137, 137, 138, 138, 138,
139, 139, 140, 140, 141, 141, 142, 142,
143, 143, 144, 144, 145, 145, 146, 146,
147, 147, 148, 148, 148, 149, 149, 150,
150, 151, 151, 152, 152, 153, 153, 153,
154, 154, 155, 155, 156, 156, 157, 157,
158, 158, 158, 159, 159, 160, 160, 161,
161, 162, 162, 163, 163, 164, 164, 165,
165, 166, 167, 167, 168, 168, 169, 169,
170, 171, 171, 172, 173, 173, 174, 175,
176, 176, 177, 178, 179, 180, 181, 181,
182, 183, 184, 185, 186, 188, 189, 190,
191, 192, 194, 195, 196, 198, 199, 201,
202, 204, 205, 207, 209, 211, 213, 215,
217, 219, 222, 224, 226, 229, 231, 234,
236, 239, 242, 244, 247, 250, 252, 255
};
#elif CONFIG_LCD == LCD_S1D15E06
/* measured and interpolated curve */
static const unsigned char lcdlinear[256] = {
0, 5, 11, 16, 21, 27, 32, 37,
42, 47, 51, 56, 60, 64, 68, 72,
75, 78, 81, 84, 87, 89, 91, 93,
95, 96, 98, 99, 101, 102, 103, 104,
105, 106, 107, 108, 109, 110, 111, 111,
112, 113, 113, 114, 115, 115, 116, 117,
117, 118, 118, 119, 119, 120, 120, 121,
121, 122, 122, 123, 123, 124, 124, 125,
125, 126, 126, 127, 127, 127, 128, 128,
129, 129, 130, 130, 131, 131, 132, 132,
133, 133, 134, 134, 135, 135, 136, 136,
137, 137, 138, 138, 138, 139, 139, 140,
140, 141, 141, 141, 142, 142, 143, 143,
143, 144, 144, 145, 145, 145, 146, 146,
146, 147, 147, 147, 148, 148, 149, 149,
149, 150, 150, 150, 151, 151, 151, 152,
152, 153, 153, 153, 154, 154, 155, 155,
155, 156, 156, 157, 157, 157, 158, 158,
159, 159, 159, 160, 160, 161, 161, 162,
162, 162, 163, 163, 164, 164, 164, 165,
165, 166, 166, 167, 167, 167, 168, 168,
169, 169, 170, 170, 170, 171, 171, 172,
172, 173, 173, 174, 174, 175, 175, 176,
176, 177, 177, 178, 178, 179, 179, 180,
180, 181, 182, 182, 183, 184, 184, 185,
186, 186, 187, 188, 188, 189, 190, 191,
191, 192, 193, 194, 195, 196, 196, 197,
198, 199, 200, 201, 202, 203, 204, 205,
206, 207, 208, 209, 210, 211, 213, 214,
215, 216, 218, 219, 220, 222, 223, 225,
227, 228, 230, 232, 233, 235, 237, 239,
241, 243, 245, 247, 249, 251, 253, 255
};
#elif (CONFIG_LCD == LCD_IPOD2BPP) || (CONFIG_LCD == LCD_IPODMINI)
/* measured and interpolated curve for mini LCD */
/* TODO: verify this curve on the fullsize greyscale LCD */
static const unsigned char lcdlinear[256] = {
0, 3, 6, 8, 11, 14, 17, 19,
22, 24, 27, 29, 32, 34, 36, 38,
40, 42, 44, 45, 47, 48, 50, 51,
52, 54, 55, 56, 57, 58, 58, 59,
60, 61, 62, 62, 63, 64, 64, 65,
66, 66, 67, 67, 68, 68, 69, 69,
70, 70, 70, 71, 71, 71, 72, 72,
73, 73, 73, 74, 74, 74, 74, 75,
75, 75, 76, 76, 76, 77, 77, 77,
78, 78, 78, 79, 79, 79, 80, 80,
80, 80, 81, 81, 81, 82, 82, 82,
83, 83, 83, 84, 84, 84, 85, 85,
85, 85, 86, 86, 86, 87, 87, 87,
87, 88, 88, 88, 89, 89, 89, 89,
90, 90, 90, 91, 91, 91, 92, 92,
92, 93, 93, 93, 94, 94, 94, 95,
95, 96, 96, 96, 97, 97, 98, 98,
99, 99, 99, 100, 100, 101, 101, 102,
102, 103, 103, 104, 104, 105, 105, 106,
106, 107, 107, 108, 108, 109, 109, 110,
110, 111, 111, 112, 113, 113, 114, 114,
115, 115, 116, 117, 117, 118, 118, 119,
120, 120, 121, 122, 122, 123, 124, 124,
125, 126, 126, 127, 128, 128, 129, 130,
131, 131, 132, 133, 134, 134, 135, 136,
137, 138, 139, 140, 141, 142, 143, 144,
145, 146, 147, 148, 149, 150, 152, 153,
154, 156, 157, 159, 160, 162, 163, 165,
167, 168, 170, 172, 174, 176, 178, 180,
182, 184, 187, 189, 192, 194, 197, 200,
203, 206, 209, 212, 215, 219, 222, 226,
229, 233, 236, 240, 244, 248, 251, 255
};
#endif
#else /* SIMULATOR */
/* undo a (generic) PC display gamma of 2.0 to simulate target behaviour */
static const unsigned char lcdlinear[256] = {
0, 16, 23, 28, 32, 36, 39, 42,
45, 48, 50, 53, 55, 58, 60, 62,
64, 66, 68, 70, 71, 73, 75, 77,
78, 80, 81, 83, 84, 86, 87, 89,
90, 92, 93, 94, 96, 97, 98, 100,
101, 102, 103, 105, 106, 107, 108, 109,
111, 112, 113, 114, 115, 116, 117, 118,
119, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135,
135, 136, 137, 138, 139, 140, 141, 142,
143, 144, 145, 145, 146, 147, 148, 149,
150, 151, 151, 152, 153, 154, 155, 156,
156, 157, 158, 159, 160, 160, 161, 162,
163, 164, 164, 165, 166, 167, 167, 168,
169, 170, 170, 171, 172, 173, 173, 174,
175, 176, 176, 177, 178, 179, 179, 180,
181, 181, 182, 183, 183, 184, 185, 186,
186, 187, 188, 188, 189, 190, 190, 191,
192, 192, 193, 194, 194, 195, 196, 196,
197, 198, 198, 199, 199, 200, 201, 201,
202, 203, 203, 204, 204, 205, 206, 206,
207, 208, 208, 209, 209, 210, 211, 211,
212, 212, 213, 214, 214, 215, 215, 216,
217, 217, 218, 218, 219, 220, 220, 221,
221, 222, 222, 223, 224, 224, 225, 225,
226, 226, 227, 228, 228, 229, 229, 230,
230, 231, 231, 232, 233, 233, 234, 234,
235, 235, 236, 236, 237, 237, 238, 238,
239, 240, 240, 241, 241, 242, 242, 243,
243, 244, 244, 245, 245, 246, 246, 247,
247, 248, 248, 249, 249, 250, 250, 251,
251, 252, 252, 253, 253, 254, 254, 255
};
#endif /* SIMULATOR */
/* Prototypes */
static inline void _deferred_update(void) __attribute__ ((always_inline));
static int exp_s16p16(int x);
static int log_s16p16(int x);
static void gray_screendump_hook(int fd);
#ifdef SIMULATOR
static unsigned long _gray_get_pixel(int x, int y);
#else
static void _timer_isr(void);
#endif
/* Update LCD areas not covered by the greyscale overlay */
static inline void _deferred_update(void)
{
int x1 = MAX(_gray_info.x, 0);
int x2 = MIN(_gray_info.x + _gray_info.width, LCD_WIDTH);
int y1 = MAX(_gray_info.y, 0);
int y2 = MIN(_gray_info.y + _gray_info.height, LCD_HEIGHT);
if (y1 > 0) /* refresh part above overlay, full width */
_gray_rb->lcd_update_rect(0, 0, LCD_WIDTH, y1);
if (y2 < LCD_HEIGHT) /* refresh part below overlay, full width */
_gray_rb->lcd_update_rect(0, y2, LCD_WIDTH, LCD_HEIGHT - y2);
if (x1 > 0) /* refresh part to the left of overlay */
_gray_rb->lcd_update_rect(0, y1, x1, y2 - y1);
if (x2 < LCD_WIDTH) /* refresh part to the right of overlay */
_gray_rb->lcd_update_rect(x2, y1, LCD_WIDTH - x2, y2 - y1);
}
#ifndef SIMULATOR
/* Timer interrupt handler: display next bitplane */
static void _timer_isr(void)
{
#if LCD_PIXELFORMAT == HORIZONTAL_PACKING
_gray_rb->lcd_blit(_gray_info.plane_data + MULU16(_gray_info.plane_size,
_gray_info.cur_plane), _gray_info.bx, _gray_info.y,
_gray_info.bwidth, _gray_info.height, _gray_info.bwidth);
#else
_gray_rb->lcd_blit(_gray_info.plane_data + MULU16(_gray_info.plane_size,
_gray_info.cur_plane), _gray_info.x, _gray_info.by,
_gray_info.width, _gray_info.bheight, _gray_info.width);
#endif
if (++_gray_info.cur_plane >= _gray_info.depth)
_gray_info.cur_plane = 0;
if (_gray_info.flags & _GRAY_DEFERRED_UPDATE) /* lcd_update() requested? */
{
_deferred_update();
_gray_info.flags &= ~_GRAY_DEFERRED_UPDATE; /* clear request */
}
}
#endif /* !SIMULATOR */
/* fixed point exp() */
static int exp_s16p16(int x)
{
int t;
int y = 0x00010000;
if (x < 0) x += 0xb1721, y >>= 16;
t = x - 0x58b91; if (t >= 0) x = t, y <<= 8;
t = x - 0x2c5c8; if (t >= 0) x = t, y <<= 4;
t = x - 0x162e4; if (t >= 0) x = t, y <<= 2;
t = x - 0x0b172; if (t >= 0) x = t, y <<= 1;
t = x - 0x067cd; if (t >= 0) x = t, y += y >> 1;
t = x - 0x03920; if (t >= 0) x = t, y += y >> 2;
t = x - 0x01e27; if (t >= 0) x = t, y += y >> 3;
t = x - 0x00f85; if (t >= 0) x = t, y += y >> 4;
t = x - 0x007e1; if (t >= 0) x = t, y += y >> 5;
t = x - 0x003f8; if (t >= 0) x = t, y += y >> 6;
t = x - 0x001fe; if (t >= 0) x = t, y += y >> 7;
y += ((y >> 8) * x) >> 8;
return y;
}
/* fixed point log() */
static int log_s16p16(int x)
{
int t;
int y = 0xa65af;
if (x < 0x00008000) x <<=16, y -= 0xb1721;
if (x < 0x00800000) x <<= 8, y -= 0x58b91;
if (x < 0x08000000) x <<= 4, y -= 0x2c5c8;
if (x < 0x20000000) x <<= 2, y -= 0x162e4;
if (x < 0x40000000) x <<= 1, y -= 0x0b172;
t = x + (x >> 1); if ((t & 0x80000000) == 0) x = t, y -= 0x067cd;
t = x + (x >> 2); if ((t & 0x80000000) == 0) x = t, y -= 0x03920;
t = x + (x >> 3); if ((t & 0x80000000) == 0) x = t, y -= 0x01e27;
t = x + (x >> 4); if ((t & 0x80000000) == 0) x = t, y -= 0x00f85;
t = x + (x >> 5); if ((t & 0x80000000) == 0) x = t, y -= 0x007e1;
t = x + (x >> 6); if ((t & 0x80000000) == 0) x = t, y -= 0x003f8;
t = x + (x >> 7); if ((t & 0x80000000) == 0) x = t, y -= 0x001fe;
x = 0x80000000 - x;
y -= x >> 15;
return y;
}
/* Initialise the framework and prepare the greyscale display buffer
arguments:
newrb = pointer to plugin api
gbuf = pointer to the memory area to use (e.g. plugin buffer)
gbuf_size = max usable size of the buffer
buffered = use chunky pixel buffering with delta buffer?
This allows to use all drawing functions, but needs more
memory. Unbuffered operation provides only a subset of
drawing functions. (only gray_bitmap drawing and scrolling)
width = width in pixels (1..LCD_WIDTH)
height = height in pixels (1..LCD_HEIGHT)
Note that depending on the target LCD, either height or
width are rounded up to a multiple of 8.
depth = number of bitplanes to use (1..32).
gamma = gamma value as s8p8 fixed point. gamma <= 0 means no
correction at all, i.e. no LCD linearisation as well.
result:
= depth if there was enough memory
< depth if there wasn't enough memory. The number of displayable
shades is smaller than desired, but it still works
= 0 if there wasn't even enough memory for 1 bitplane
You can request any depth in the allowed range, not just powers of 2. The
routine performs "graceful degradation" if the memory is not sufficient for
the desired depth. As long as there is at least enough memory for 1 bitplane,
it creates as many bitplanes as fit into memory, although 1 bitplane won't
deliver an enhancement over the native display.
The number of displayable shades is calculated as follows:
shades = depth + 1
If you need info about the memory taken by the greyscale buffer, supply a
long* as the last parameter. This long will then contain the number of bytes
used. The total memory needed can be calculated as follows:
total_mem =
shades * sizeof(long) (bitpatterns)
+ [horizontal_packing] ? (bitplane data)
((width + 7) / 8) * height * depth : width * ((height + 7) / 8) * depth
+ buffered ? (chunky front- & backbuffer)
(width * height * 2) : 0
+ 0..3 (longword alignment)
The function tries to be as authentic as possible regarding memory usage on
the simulator, even if it doesn't use all of the allocated memory. There's
one situation where it will consume more memory on the sim than on the
target: if you're allocating a low depth (< 8) without buffering. */
int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size,
bool buffered, int width, int height, int depth, int gamma,
long *buf_taken)
{
int possible_depth, bdim, i;
long plane_size, buftaken;
unsigned data;
#ifndef SIMULATOR
int j, bitfill;
#endif
_gray_rb = newrb;
if ((unsigned) width > LCD_WIDTH
|| (unsigned) height > LCD_HEIGHT
|| depth < 1)
return 0;
#if LCD_PIXELFORMAT == HORIZONTAL_PACKING
bdim = (width + 7) >> 3;
width = bdim << 3;
#else
bdim = (height + 7) >> 3;
height = bdim << 3;
#endif
/* the buffer has to be long aligned */
buftaken = (-(long)gbuf) & 3;
gbuf += buftaken;
/* chunky front- & backbuffer */
if (buffered)
{
plane_size = MULU16(width, height);
buftaken += 2 * plane_size;
if (buftaken > gbuf_size)
return 0;
_gray_info.cur_buffer = gbuf;
gbuf += plane_size;
/* set backbuffer to 0xFF to guarantee the initial full update */
_gray_rb->memset(gbuf, 0xFF, plane_size);
_gray_info.back_buffer = gbuf;
gbuf += plane_size;
}
#if LCD_PIXELFORMAT == HORIZONTAL_PACKING
plane_size = MULU16(bdim, height);
#else
plane_size = MULU16(width, bdim);
#endif
possible_depth = (gbuf_size - buftaken - sizeof(long))
/ (plane_size + sizeof(long));
if (possible_depth < 1)
return 0;
depth = MIN(depth, 32);
depth = MIN(depth, possible_depth);
#ifdef SIMULATOR
if (!buffered)
{
long orig_size = MULU16(depth, plane_size) + (depth + 1) * sizeof(long);
plane_size = MULU16(width, height);
if (plane_size > orig_size)
{
buftaken += plane_size;
if (buftaken > gbuf_size)
return 0;
}
else
{
buftaken += orig_size;
}
_gray_info.cur_buffer = gbuf;
}
else
#endif
buftaken += MULU16(depth, plane_size) + (depth + 1) * sizeof(long);
_gray_info.x = 0;
_gray_info.y = 0;
_gray_info.width = width;
_gray_info.height = height;
#if LCD_PIXELFORMAT == HORIZONTAL_PACKING
_gray_info.bx = 0;
_gray_info.bwidth = bdim;
#else
_gray_info.by = 0;
_gray_info.bheight = bdim;
#endif
_gray_info.depth = depth;
_gray_info.flags = 0;
#ifndef SIMULATOR
_gray_info.cur_plane = 0;
_gray_info.plane_size = plane_size;
_gray_info.plane_data = gbuf;
_gray_rb->memset(gbuf, 0, MULU16(depth, plane_size));
gbuf += MULU16(depth, plane_size);
_gray_info.bitpattern = (unsigned long *)gbuf;
i = depth - 1;
j = 8;
while (i != 0)
{
i >>= 1;
j--;
}
_gray_info.randmask = 0xFFu >> j;
bitfill = (-depth) & 7;
/* Precalculate the bit patterns for all possible pixel values */
for (i = 0; i <= depth; i++)
{
unsigned long pattern = 0;
int value = 0;
for (j = 0; j < depth; j++)
{
pattern <<= 1;
value += i;
if (value >= depth)
value -= depth; /* "white" bit */
else
pattern |= 1; /* "black" bit */
}
/* now the lower <depth> bits contain the pattern */
_gray_info.bitpattern[i] = pattern << bitfill;
}
#endif
/* precalculate the value -> pattern index conversion table, taking
linearisation and gamma correction into account */
if (gamma <= 0)
{
for (i = 0; i < 256; i++)
{
data = MULU16(depth, i) + 127;
_gray_info.idxtable[i] = (data + (data >> 8)) >> 8;
/* approx. data / 255 */
}
}
else
{
for (i = 0; i < 256; i++)
{
data = exp_s16p16((gamma * log_s16p16(i * 257 + 1)) >> 8) + 128;
data = (data - (data >> 8)) >> 8; /* approx. data /= 257 */
data = MULU16(depth, lcdlinear[data]) + 127;
_gray_info.idxtable[i] = (data + (data >> 8)) >> 8;
/* approx. data / 255 */
}
}
_gray_info.fg_index = 0;
_gray_info.bg_index = depth;
_gray_info.fg_brightness = 0;
_gray_info.bg_brightness = 255;
_gray_info.drawmode = DRMODE_SOLID;
_gray_info.curfont = FONT_SYSFIXED;
if (buf_taken) /* caller requested info about space taken */
*buf_taken = buftaken;
return depth;
}
/* Release the greyscale display buffer and the library
DO CALL either this function or at least gray_show_display(false)
before you exit, otherwise nasty things may happen. */
void gray_release(void)
{
gray_show(false);
}
/* Switch the greyscale overlay on or off
DO NOT call lcd_update() or any other api function that directly accesses
the lcd while the greyscale overlay is running! If you need to do
lcd_update() to update something outside the greyscale overlay area, use
gray_deferred_update() instead.
Other functions to avoid are:
lcd_blit() (obviously), lcd_update_rect(), lcd_set_contrast(),
lcd_set_invert_display(), lcd_set_flip(), lcd_roll() */
void gray_show(bool enable)
{
if (enable && !(_gray_info.flags & _GRAY_RUNNING))
{
_gray_info.flags |= _GRAY_RUNNING;
#ifdef SIMULATOR
_gray_rb->sim_lcd_ex_init(_gray_info.depth + 1, _gray_get_pixel);
gray_update();
#else /* !SIMULATOR */
#ifdef NEED_BOOST
_gray_rb->cpu_boost(true);
#endif
#if CONFIG_LCD == LCD_SSD1815
_gray_rb->timer_register(1, NULL, TIMER_FREQ / 67, 1, _timer_isr);
#elif CONFIG_LCD == LCD_S1D15E06
_gray_rb->timer_register(1, NULL, TIMER_FREQ / 70, 1, _timer_isr);
#elif CONFIG_LCD == LCD_IPOD2BPP
/* FIXME: verify value */
_gray_rb->timer_register(1, NULL, TIMER_FREQ / 80, 1, _timer_isr);
#elif CONFIG_LCD == LCD_IPODMINI
_gray_rb->timer_register(1, NULL, TIMER_FREQ / 88, 1, _timer_isr);
#elif CONFIG_LCD == LCD_IFP7XX
(void)_timer_isr; /* TODO: implement for iFP */
#endif /* CONFIG_LCD */
#endif /* !SIMULATOR */
_gray_rb->screen_dump_set_hook(gray_screendump_hook);
}
else if (!enable && (_gray_info.flags & _GRAY_RUNNING))
{
#ifdef SIMULATOR
_gray_rb->sim_lcd_ex_init(0, NULL);
#else
_gray_rb->timer_unregister();
#ifdef NEED_BOOST
_gray_rb->cpu_boost(false);
#endif
#endif
_gray_info.flags &= ~_GRAY_RUNNING;
_gray_rb->screen_dump_set_hook(NULL);
_gray_rb->lcd_update(); /* restore whatever there was before */
}
}
#ifdef SIMULATOR
/* Callback function for gray_update_rect() to read a pixel from the graybuffer.
Note that x and y are in LCD coordinates, not graybuffer coordinates! */
static unsigned long _gray_get_pixel(int x, int y)
{
#if LCD_PIXELFORMAT == HORIZONTAL_PACKING
return _gray_info.cur_buffer[MULU16(y - _gray_info.y, _gray_info.width)
+ x - _gray_info.x]
+ (1 << LCD_DEPTH);
#else
return _gray_info.cur_buffer[MULU16(x - _gray_info.x, _gray_info.height)
+ y - _gray_info.y]
+ (1 << LCD_DEPTH);
#endif
}
/* Update a rectangular area of the greyscale overlay */
void gray_update_rect(int x, int y, int width, int height)
{
if (x + width > _gray_info.width)
width = _gray_info.width - x;
if (y + height > _gray_info.height)
height = _gray_info.height - y;
x += _gray_info.x;
y += _gray_info.y;
if (x + width > LCD_WIDTH)
width = LCD_WIDTH - x;
if (y + height > LCD_HEIGHT)
height = LCD_HEIGHT - y;
_gray_rb->sim_lcd_ex_update_rect(x, y, width, height);
}
#else /* !SIMULATOR */
#if LCD_PIXELFORMAT == HORIZONTAL_PACKING
/* Update a rectangular area of the greyscale overlay */
void gray_update_rect(int x, int y, int width, int height)
{
int xmax, bwidth;
long srcofs;
unsigned char *dst;
if ((width <= 0) || (height <= 0))
return; /* nothing to do */
/* The X coordinates have to work on whole pixel block columns */
xmax = (x + width - 1) >> 3;
x >>= 3;
if (y + height > _gray_info.height)
height = _gray_info.height - y;
if (xmax >= _gray_info.bwidth)
xmax = _gray_info.bwidth - 1;
bwidth = xmax - x + 1;
srcofs = MULU16(_gray_info.width, y) + (x << 3);
dst = _gray_info.plane_data + MULU16(_gray_info.bwidth, y) + x;
/* Copy specified rectangle bitmap to hardware */
for (; height > 0; height--)
{
long srcofs_row = srcofs;
unsigned char *dst_row = dst;
unsigned char *dst_end = dst_row + bwidth;
do
{
unsigned long pat_stack[8];
unsigned long *pat_ptr;
unsigned char *cbuf, *bbuf;
unsigned change;
cbuf = _gray_info.cur_buffer + srcofs_row;
bbuf = _gray_info.back_buffer + srcofs_row;
#ifdef CPU_ARM
asm volatile
(
"ldr r0, [%[cbuf]] \n"
"ldr r1, [%[bbuf]] \n"
"eor r1, r0, r1 \n"
"ldr r0, [%[cbuf], #4] \n"
"ldr %[chg], [%[bbuf], #4] \n"
"eor %[chg], r0, %[chg] \n"
"orr %[chg], %[chg], r1 \n"
: /* outputs */
[chg] "=&r"(change)
: /* inputs */
[cbuf]"r"(cbuf),
[bbuf]"r"(bbuf)
: /* clobbers */
"r0", "r1"
);
if (change != 0)
{
unsigned char *addr;
unsigned mask, depth, trash;
pat_ptr = &pat_stack[8];
/* precalculate the bit patterns with random shifts
* for all 8 pixels and put them on an extra "stack" */
asm volatile
(
"mov r3, #8 \n" /* loop count */
"mov %[mask], #0 \n"
".ur_pre_loop: \n"
"mov %[mask], %[mask], lsl #1 \n" /* shift mask */
"ldrb r0, [%[cbuf]], #1 \n" /* read current buffer */
"ldrb r1, [%[bbuf]] \n" /* read back buffer */
"strb r0, [%[bbuf]], #1 \n" /* update back buffer */
"mov r2, #0 \n" /* preset for skipped pixel */
"cmp r0, r1 \n" /* no change? */
"beq .ur_skip \n" /* -> skip */
"ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */
"add %[rnd], %[rnd], %[rnd], lsl #2 \n" /* multiply by 75 */
"rsb %[rnd], %[rnd], %[rnd], lsl #4 \n"
"add %[rnd], %[rnd], #74 \n" /* add another 74 */
/* Since the lower bits are not very random: get bits 8..15 (need max. 5) */
"and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */
"cmp r1, %[dpth] \n" /* random >= depth ? */
"subhs r1, r1, %[dpth] \n" /* yes: random -= depth */
"mov r0, r2, lsl r1 \n" /** rotate pattern **/
"sub r1, %[dpth], r1 \n"
"orr r2, r0, r2, lsr r1 \n"
"orr %[mask], %[mask], #1 \n" /* set mask bit */
".ur_skip: \n"
"str r2, [%[patp], #-4]! \n" /* push on pattern stack */
"subs r3, r3, #1 \n" /* loop 8 times (pixel block) */
"bne .ur_pre_loop \n"
: /* outputs */
[cbuf]"+r"(cbuf),
[bbuf]"+r"(bbuf),
[patp]"+r"(pat_ptr),
[rnd] "+r"(_gray_random_buffer),
[mask]"=&r"(mask)
: /* inputs */
[bpat]"r"(_gray_info.bitpattern),
[dpth]"r"(_gray_info.depth),
[rmsk]"r"(_gray_info.randmask)
: /* clobbers */
"r0", "r1", "r2", "r3"
);
addr = dst_row;
depth = _gray_info.depth;
/* set the bits for all 8 pixels in all bytes according to the
* precalculated patterns on the pattern stack */
asm volatile
(
"ldmia %[patp], {r1 - r8} \n" /* pop all 8 patterns */
/** Rotate the four 8x8 bit "blocks" within r1..r8 **/
"mov %[rx], #0xF0 \n" /** Stage 1: 4 bit "comb" **/
"orr %[rx], %[rx], %[rx], lsl #8 \n"
"orr %[rx], %[rx], %[rx], lsl #16\n" /* bitmask = ...11110000 */
"eor r0, r1, r5, lsl #4 \n"
"and r0, r0, %[rx] \n"
"eor r1, r1, r0 \n" /* r1 = ...e3e2e1e0a3a2a1a0 */
"eor r5, r5, r0, lsr #4 \n" /* r5 = ...e7e6e5e4a7a6a5a4 */
"eor r0, r2, r6, lsl #4 \n"
"and r0, r0, %[rx] \n"
"eor r2, r2, r0 \n" /* r2 = ...f3f2f1f0b3b2b1b0 */
"eor r6, r6, r0, lsr #4 \n" /* r6 = ...f7f6f5f4f7f6f5f4 */
"eor r0, r3, r7, lsl #4 \n"
"and r0, r0, %[rx] \n"
"eor r3, r3, r0 \n" /* r3 = ...g3g2g1g0c3c2c1c0 */
"eor r7, r7, r0, lsr #4 \n" /* r7 = ...g7g6g5g4c7c6c5c4 */
"eor r0, r4, r8, lsl #4 \n"
"and r0, r0, %[rx] \n"
"eor r4, r4, r0 \n" /* r4 = ...h3h2h1h0d3d2d1d0 */
"eor r8, r8, r0, lsr #4 \n" /* r8 = ...h7h6h5h4d7d6d5d4 */
"mov %[rx], #0xCC \n" /** Stage 2: 2 bit "comb" **/
"orr %[rx], %[rx], %[rx], lsl #8 \n"
"orr %[rx], %[rx], %[rx], lsl #16\n" /* bitmask = ...11001100 */
"eor r0, r1, r3, lsl #2 \n"
"and r0, r0, %[rx] \n"
"eor r1, r1, r0 \n" /* r1 = ...g1g0e1e0c1c0a1a0 */
"eor r3, r3, r0, lsr #2 \n" /* r3 = ...g3g2e3e2c3c2a3a2 */
"eor r0, r2, r4, lsl #2 \n"
"and r0, r0, %[rx] \n"
"eor r2, r2, r0 \n" /* r2 = ...h1h0f1f0d1d0b1b0 */
"eor r4, r4, r0, lsr #2 \n" /* r4 = ...h3h2f3f2d3d2b3b2 */
"eor r0, r5, r7, lsl #2 \n"
"and r0, r0, %[rx] \n"
"eor r5, r5, r0 \n" /* r5 = ...g5g4e5e4c5c4a5a4 */
"eor r7, r7, r0, lsr #2 \n" /* r7 = ...g7g6e7e6c7c6a7a6 */
"eor r0, r6, r8, lsl #2 \n"
"and r0, r0, %[rx] \n"
"eor r6, r6, r0 \n" /* r6 = ...h5h4f5f4d5d4b5b4 */
"eor r8, r8, r0, lsr #2 \n" /* r8 = ...h7h6f7f6d7d6b7b6 */
"mov %[rx], #0xAA \n" /** Stage 3: 1 bit "comb" **/
"orr %[rx], %[rx], %[rx], lsl #8 \n"
"orr %[rx], %[rx], %[rx], lsl #16\n" /* bitmask = ...10101010 */
"eor r0, r1, r2, lsl #1 \n"
"and r0, r0, %[rx] \n"
"eor r1, r1, r0 \n" /* r1 = ...h0g0f0e0d0c0b0a0 */
"eor r2, r2, r0, lsr #1 \n" /* r2 = ...h1g1f1e1d1c1b1a1 */
"eor r0, r3, r4, lsl #1 \n"
"and r0, r0, %[rx] \n"
"eor r3, r3, r0 \n" /* r3 = ...h2g2f2e2d2c2b2a2 */
"eor r4, r4, r0, lsr #1 \n" /* r4 = ...h3g3f3e3d3c3b3a3 */
"eor r0, r5, r6, lsl #1 \n"
"and r0, r0, %[rx] \n"
"eor r5, r5, r0 \n" /* r5 = ...h4g4f4e4d4c4b4a4 */
"eor r6, r6, r0, lsr #1 \n" /* r6 = ...h5g5f5e5d5c5b5a5 */
"eor r0, r7, r8, lsl #1 \n"
"and r0, r0, %[rx] \n"
"eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */
"eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
"sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/
"and r0, r0, #7 \n"
"add pc, pc, r0, lsl #2 \n" /* jump into shift streak */
"mov r8, r8, lsr #8 \n" /* r8: never reached */
"mov r7, r7, lsr #8 \n"
"mov r6, r6, lsr #8 \n"
"mov r5, r5, lsr #8 \n"
"mov r4, r4, lsr #8 \n"
"mov r3, r3, lsr #8 \n"
"mov r2, r2, lsr #8 \n"
"mov r1, r1, lsr #8 \n"
"mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
"ands %[mask], %[mask], #0xff \n"
"beq .ur_sstart \n" /* short loop if no bits to keep */
"ldrb r0, [pc, r0] \n" /* jump into full loop */
"add pc, pc, r0 \n"
".ur_ftable: \n"
".byte .ur_f1 - .ur_ftable - 4 \n" /* [jump tables are tricky] */
".byte .ur_f2 - .ur_ftable - 4 \n"
".byte .ur_f3 - .ur_ftable - 4 \n"
".byte .ur_f4 - .ur_ftable - 4 \n"
".byte .ur_f5 - .ur_ftable - 4 \n"
".byte .ur_f6 - .ur_ftable - 4 \n"
".byte .ur_f7 - .ur_ftable - 4 \n"
".byte .ur_f8 - .ur_ftable - 4 \n"
".ur_floop: \n" /** full loop (bits to keep)**/
".ur_f8: \n"
"ldrb r0, [%[addr]] \n" /* load old byte */
"and r0, r0, %[mask] \n" /* mask out replaced bits */
"orr r0, r0, r1 \n" /* set new bits */
"strb r0, [%[addr]], %[psiz] \n" /* store byte */
"mov r1, r1, lsr #8 \n" /* shift out used-up byte */
".ur_f7: \n"
"ldrb r0, [%[addr]] \n"
"and r0, r0, %[mask] \n"
"orr r0, r0, r2 \n"
"strb r0, [%[addr]], %[psiz] \n"
"mov r2, r2, lsr #8 \n"
".ur_f6: \n"
"ldrb r0, [%[addr]] \n"
"and r0, r0, %[mask] \n"
"orr r0, r0, r3 \n"
"strb r0, [%[addr]], %[psiz] \n"
"mov r3, r3, lsr #8 \n"
".ur_f5: \n"
"ldrb r0, [%[addr]] \n"
"and r0, r0, %[mask] \n"
"orr r0, r0, r4 \n"
"strb r0, [%[addr]], %[psiz] \n"
"mov r4, r4, lsr #8 \n"
".ur_f4: \n"
"ldrb r0, [%[addr]] \n"
"and r0, r0, %[mask] \n"
"orr r0, r0, r5 \n"
"strb r0, [%[addr]], %[psiz] \n"
"mov r5, r5, lsr #8 \n"
".ur_f3: \n"
"ldrb r0, [%[addr]] \n"
"and r0, r0, %[mask] \n"
"orr r0, r0, r6 \n"
"strb r0, [%[addr]], %[psiz] \n"
"mov r6, r6, lsr #8 \n"
".ur_f2: \n"
"ldrb r0, [%[addr]] \n"
"and r0, r0, %[mask] \n"
"orr r0, r0, r7 \n"
"strb r0, [%[addr]], %[psiz] \n"
"mov r7, r7, lsr #8 \n"
".ur_f1: \n"
"ldrb r0, [%[addr]] \n"
"and r0, r0, %[mask] \n"
"orr r0, r0, r8 \n"
"strb r0, [%[addr]], %[psiz] \n"
"mov r8, r8, lsr #8 \n"
"subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
"bhi .ur_floop \n"
"b .ur_end \n"
".ur_sstart: \n"
"ldrb r0, [pc, r0] \n" /* jump into short loop*/
"add pc, pc, r0 \n"
".ur_stable: \n"
".byte .ur_s1 - .ur_stable - 4 \n"
".byte .ur_s2 - .ur_stable - 4 \n"
".byte .ur_s3 - .ur_stable - 4 \n"
".byte .ur_s4 - .ur_stable - 4 \n"
".byte .ur_s5 - .ur_stable - 4 \n"
".byte .ur_s6 - .ur_stable - 4 \n"
".byte .ur_s7 - .ur_stable - 4 \n"
".byte .ur_s8 - .ur_stable - 4 \n"
".ur_sloop: \n" /** short loop (nothing to keep) **/
".ur_s8: \n"
"strb r1, [%[addr]], %[psiz] \n" /* store byte */
"mov r1, r1, lsr #8 \n" /* shift out used-up byte */
".ur_s7: \n"
"strb r2, [%[addr]], %[psiz] \n"
"mov r2, r2, lsr #8 \n"
".ur_s6: \n"
"strb r3, [%[addr]], %[psiz] \n"
"mov r3, r3, lsr #8 \n"
".ur_s5: \n"
"strb r4, [%[addr]], %[psiz] \n"
"mov r4, r4, lsr #8 \n"
".ur_s4: \n"
"strb r5, [%[addr]], %[psiz] \n"
"mov r5, r5, lsr #8 \n"
".ur_s3: \n"
"strb r6, [%[addr]], %[psiz] \n"
"mov r6, r6, lsr #8 \n"
".ur_s2: \n"
"strb r7, [%[addr]], %[psiz] \n"
"mov r7, r7, lsr #8 \n"
".ur_s1: \n"
"strb r8, [%[addr]], %[psiz] \n"
"mov r8, r8, lsr #8 \n"
"subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
"bhi .ur_sloop \n"
".ur_end: \n"
: /* outputs */
[addr]"+r"(addr),
[mask]"+r"(mask),
[dpth]"+r"(depth),
[rx] "=&r"(trash)
: /* inputs */
[psiz]"r"(_gray_info.plane_size),
[patp]"[rx]"(pat_ptr)
: /* clobbers */
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
);
}
#else /* C version, for reference*/
#warning C version of gray_update_rect() used
(void)pat_ptr;
/* check whether anything changed in the 8-pixel block */
change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4);
if (change != 0)
{
unsigned char *addr, *end;
unsigned mask = 0;
unsigned test = 1 << ((-_gray_info.depth) & 7);
int i;
/* precalculate the bit patterns with random shifts
* for all 8 pixels and put them on an extra "stack" */
for (i = 7; i >= 0; i--)
{
unsigned pat = 0;
unsigned char cur = *cbuf++;
unsigned char back = *bbuf;
*bbuf++ = cur;
mask <<= 1;
if (cur != back)
{
int shift;
pat = _gray_info.bitpattern[cur];
/* shift pattern pseudo-random, simple & fast PRNG */
_gray_random_buffer = 75 * _gray_random_buffer + 74;
shift = (_gray_random_buffer >> 8) & _gray_info.randmask;
if (shift >= _gray_info.depth)
shift -= _gray_info.depth;
pat = (pat << shift) | (pat >> (_gray_info.depth - shift));
mask |= 1;
}
pat_stack[i] = pat;
}
addr = dst_row;
end = addr + MULU16(_gray_info.depth, _gray_info.plane_size);
/* set the bits for all 8 pixels in all bytes according to the
* precalculated patterns on the pattern stack */
mask = (~mask & 0xff);
if (mask == 0)
{
do
{
unsigned data = 0;
for (i = 7; i >= 0; i--)
data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0);
*addr = data;
addr += _gray_info.plane_size;
test <<= 1;
}
while (addr < end);
}
else
{
do
{
unsigned data = 0;
for (i = 7; i >= 0; i--)
data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0);
*addr = (*addr & mask) | data;
addr += _gray_info.plane_size;
test <<= 1;
}
while (addr < end);
}
}
#endif /* CONFIG_CPU */
srcofs_row += 8;
dst_row++;
}
while (dst_row < dst_end);
srcofs += _gray_info.width;
dst += _gray_info.bwidth;
}
}
#else /* LCD_PIXELFORMAT == VERTICAL_PACKING */
/* Update a rectangular area of the greyscale overlay */
void gray_update_rect(int x, int y, int width, int height)
{
int ymax;
long srcofs;
unsigned char *dst;
if ((width <= 0) || (height <= 0))
return; /* nothing to do */
/* The Y coordinates have to work on whole pixel block rows */
ymax = (y + height - 1) >> 3;
y >>= 3;
if (x + width > _gray_info.width)
width = _gray_info.width - x;
if (ymax >= _gray_info.bheight)
ymax = _gray_info.bheight - 1;
srcofs = (y << 3) + MULU16(_gray_info.height, x);
dst = _gray_info.plane_data + MULU16(_gray_info.width, y) + x;
/* Copy specified rectangle bitmap to hardware */
for (; y <= ymax; y++)
{
long srcofs_row = srcofs;
unsigned char *dst_row = dst;
unsigned char *dst_end = dst_row + width;
do
{
unsigned long pat_stack[8];
unsigned long *pat_ptr;
unsigned char *cbuf, *bbuf;
unsigned change;
cbuf = _gray_info.cur_buffer + srcofs_row;
bbuf = _gray_info.back_buffer + srcofs_row;
#if CONFIG_CPU == SH7034
asm volatile (
"mov.l @%[bbuf], r2 \n"
"mov.l @%[cbuf], r1 \n"
"mov.l @(4,%[bbuf]), %[chg]\n"
"xor r1, r2 \n"
"mov.l @(4,%[cbuf]), r1 \n"
"xor r1, %[chg] \n"
"or r2, %[chg] \n"
: /* outputs */
[chg] "=r"(change)
: /* inputs */
[cbuf]"r"(cbuf),
[bbuf]"r"(bbuf)
: /* clobbers */
"r1", "r2"
);
if (change != 0)
{
unsigned char *addr;
unsigned mask, depth, trash;
pat_ptr = &pat_stack[8];
/* precalculate the bit patterns with random shifts
* for all 8 pixels and put them on an extra "stack" */
asm volatile
(
"mov #8, r3 \n" /* loop count */
".ur_pre_loop: \n"
"mov.b @%[cbuf]+, r0 \n" /* read current buffer */
"mov.b @%[bbuf], r1 \n" /* read back buffer */
"mov #0, r2 \n" /* preset for skipped pixel */
"mov.b r0, @%[bbuf] \n" /* update back buffer */
"add #1, %[bbuf] \n"
"cmp/eq r0, r1 \n" /* no change? */
"bt .ur_skip \n" /* -> skip */
"mov #75, r1 \n"
"mulu r1, %[rnd] \n" /* multiply by 75 */
"shll2 r0 \n" /* pixel value -> pattern offset */
"mov.l @(r0,%[bpat]), r4 \n" /* r4 = bitpattern[byte]; */
"sts macl, %[rnd] \n"
"add #74, %[rnd] \n" /* add another 74 */
/* Since the lower bits are not very random: */
"swap.b %[rnd], r1 \n" /* get bits 8..15 (need max. 5) */
"and %[rmsk], r1 \n" /* mask out unneeded bits */
"cmp/hs %[dpth], r1 \n" /* random >= depth ? */
"bf .ur_ntrim \n"
"sub %[dpth], r1 \n" /* yes: random -= depth; */
".ur_ntrim: \n"
"mov.l .ashlsi3, r0 \n" /** rotate pattern **/
"jsr @r0 \n" /* r4 -> r0, shift left by r5 */
"mov r1, r5 \n"
"mov %[dpth], r5 \n"
"sub r1, r5 \n" /* r5 = depth - r1 */
"mov.l .lshrsi3, r1 \n"
"jsr @r1 \n" /* r4 -> r0, shift right by r5 */
"mov r0, r2 \n" /* store previous result in r2 */
"or r0, r2 \n" /* rotated_pattern = r2 | r0 */
"clrt \n" /* mask bit = 0 (replace) */
".ur_skip: \n" /* T == 1 if skipped */
"rotcr %[mask] \n" /* get mask bit */
"mov.l r2, @-%[patp] \n" /* push on pattern stack */
"add #-1, r3 \n" /* loop 8 times (pixel block) */
"cmp/pl r3 \n"
"bt .ur_pre_loop \n"
"shlr8 %[mask] \n" /* shift mask to low byte */
"shlr16 %[mask] \n"
: /* outputs */
[cbuf]"+r"(cbuf),
[bbuf]"+r"(bbuf),
[rnd] "+r"(_gray_random_buffer),
[patp]"+r"(pat_ptr),
[mask]"=&r"(mask)
: /* inputs */
[dpth]"r"(_gray_info.depth),
[bpat]"r"(_gray_info.bitpattern),
[rmsk]"r"(_gray_info.randmask)
: /* clobbers */
"r0", "r1", "r2", "r3", "r4", "r5", "macl", "pr"
);
addr = dst_row;
depth = _gray_info.depth;
/* set the bits for all 8 pixels in all bytes according to the
* precalculated patterns on the pattern stack */
asm volatile
(
"mov.l @%[patp]+, r8 \n" /* pop all 8 patterns */
"mov.l @%[patp]+, r7 \n"
"mov.l @%[patp]+, r6 \n"
"mov.l @%[patp]+, r5 \n"
"mov.l @%[patp]+, r4 \n"
"mov.l @%[patp]+, r3 \n"
"mov.l @%[patp]+, r2 \n"
"mov.l @%[patp], r1 \n"
/** Rotate the four 8x8 bit "blocks" within r1..r8 **/
"mov.l .ur_mask4, %[rx] \n" /* bitmask = ...11110000 */
"mov r5, r0 \n" /** Stage 1: 4 bit "comb" **/
"shll2 r0 \n"
"shll2 r0 \n"
"xor r1, r0 \n"
"and %[rx], r0 \n"
"xor r0, r1 \n" /* r1 = ...e3e2e1e0a3a2a1a0 */
"shlr2 r0 \n"
"shlr2 r0 \n"
"xor r0, r5 \n" /* r5 = ...e7e6e5e4a7a6a5a4 */
"mov r6, r0 \n"
"shll2 r0 \n"
"shll2 r0 \n"
"xor r2, r0 \n"
"and %[rx], r0 \n"
"xor r0, r2 \n" /* r2 = ...f3f2f1f0b3b2b1b0 */
"shlr2 r0 \n"
"shlr2 r0 \n"
"xor r0, r6 \n" /* r6 = ...f7f6f5f4f7f6f5f4 */
"mov r7, r0 \n"
"shll2 r0 \n"
"shll2 r0 \n"
"xor r3, r0 \n"
"and %[rx], r0 \n"
"xor r0, r3 \n" /* r3 = ...g3g2g1g0c3c2c1c0 */
"shlr2 r0 \n"
"shlr2 r0 \n"
"xor r0, r7 \n" /* r7 = ...g7g6g5g4c7c6c5c4 */
"mov r8, r0 \n"
"shll2 r0 \n"
"shll2 r0 \n"
"xor r4, r0 \n"
"and %[rx], r0 \n"
"xor r0, r4 \n" /* r4 = ...h3h2h1h0d3d2d1d0 */
"shlr2 r0 \n"
"shlr2 r0 \n"
"xor r0, r8 \n" /* r8 = ...h7h6h5h4d7d6d5d4 */
"mov.l .ur_mask2, %[rx] \n" /* bitmask = ...11001100 */
"mov r3, r0 \n" /** Stage 2: 2 bit "comb" **/
"shll2 r0 \n"
"xor r1, r0 \n"
"and %[rx], r0 \n"
"xor r0, r1 \n" /* r1 = ...g1g0e1e0c1c0a1a0 */
"shlr2 r0 \n"
"xor r0, r3 \n" /* r3 = ...g3g2e3e2c3c2a3a2 */
"mov r4, r0 \n"
"shll2 r0 \n"
"xor r2, r0 \n"
"and %[rx], r0 \n"
"xor r0, r2 \n" /* r2 = ...h1h0f1f0d1d0b1b0 */
"shlr2 r0 \n"
"xor r0, r4 \n" /* r4 = ...h3h2f3f2d3d2b3b2 */
"mov r7, r0 \n"
"shll2 r0 \n"
"xor r5, r0 \n"
"and %[rx], r0 \n"
"xor r0, r5 \n" /* r5 = ...g5g4e5e4c5c4a5a4 */
"shlr2 r0 \n"
"xor r0, r7 \n" /* r7 = ...g7g6e7e6c7c6a7a6 */
"mov r8, r0 \n"
"shll2 r0 \n"
"xor r6, r0 \n"
"and %[rx], r0 \n"
"xor r0, r6 \n" /* r6 = ...h5h4f5f4d5d4b5b4 */
"shlr2 r0 \n"
"xor r0, r8 \n" /* r8 = ...h7h6f7f6d7d6b7b6 */
"mov.l .ur_mask1, %[rx] \n" /* bitmask = ...10101010 */
"mov r2, r0 \n" /** Stage 3: 1 bit "comb" **/
"shll r0 \n"
"xor r1, r0 \n"
"and %[rx], r0 \n"
"xor r0, r1 \n" /* r1 = ...h0g0f0e0d0c0b0a0 */
"shlr r0 \n"
"xor r0, r2 \n" /* r2 = ...h1g1f1e1d1c1b1a1 */
"mov r4, r0 \n"
"shll r0 \n"
"xor r3, r0 \n"
"and %[rx], r0 \n"
"xor r0, r3 \n" /* r3 = ...h2g2f2e2d2c2b2a2 */
"shlr r0 \n"
"xor r0, r4 \n" /* r4 = ...h3g3f3e3d3c3b3a3 */
"mov r6, r0 \n"
"shll r0 \n"
"xor r5, r0 \n"
"and %[rx], r0 \n"
"xor r0, r5 \n" /* r5 = ...h4g4f4e4d4c4b4a4 */
"shlr r0 \n"
"xor r0, r6 \n" /* r6 = ...h5g5f5e5d5c5b5a5 */
"mov r8, r0 \n"
"shll r0 \n"
"xor r7, r0 \n"
"and %[rx], r0 \n"
"xor r0, r7 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */
"shlr r0 \n"
"xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
"mov %[dpth], %[rx] \n" /** shift out unused low bytes **/
"add #-1, %[rx] \n"
"mov #7, r0 \n"
"and r0, %[rx] \n"
"mova .ur_pshift, r0 \n"
"add %[rx], r0 \n"
"add %[rx], r0 \n"
"jmp @r0 \n" /* jump into shift streak */
"nop \n"
".align 2 \n"
".ur_pshift: \n"
"shlr8 r7 \n"
"shlr8 r6 \n"
"shlr8 r5 \n"
"shlr8 r4 \n"
"shlr8 r3 \n"
"shlr8 r2 \n"
"shlr8 r1 \n"
"tst %[mask], %[mask] \n"
"bt .ur_sstart \n" /* short loop if nothing to keep */
"mova .ur_ftable, r0 \n" /* jump into full loop */
"mov.b @(r0, %[rx]), %[rx] \n"
"add %[rx], r0 \n"
"jmp @r0 \n"
"nop \n"
".align 2 \n"
".ur_ftable: \n"
".byte .ur_f1 - .ur_ftable \n"
".byte .ur_f2 - .ur_ftable \n"
".byte .ur_f3 - .ur_ftable \n"
".byte .ur_f4 - .ur_ftable \n"
".byte .ur_f5 - .ur_ftable \n"
".byte .ur_f6 - .ur_ftable \n"
".byte .ur_f7 - .ur_ftable \n"
".byte .ur_f8 - .ur_ftable \n"
".ur_floop: \n" /** full loop (there are bits to keep)**/
".ur_f8: \n"
"mov.b @%[addr], r0 \n" /* load old byte */
"and %[mask], r0 \n" /* mask out replaced bits */
"or r1, r0 \n" /* set new bits */
"mov.b r0, @%[addr] \n" /* store byte */
"add %[psiz], %[addr] \n"
"shlr8 r1 \n" /* shift out used-up byte */
".ur_f7: \n"
"mov.b @%[addr], r0 \n"
"and %[mask], r0 \n"
"or r2, r0 \n"
"mov.b r0, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r2 \n"
".ur_f6: \n"
"mov.b @%[addr], r0 \n"
"and %[mask], r0 \n"
"or r3, r0 \n"
"mov.b r0, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r3 \n"
".ur_f5: \n"
"mov.b @%[addr], r0 \n"
"and %[mask], r0 \n"
"or r4, r0 \n"
"mov.b r0, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r4 \n"
".ur_f4: \n"
"mov.b @%[addr], r0 \n"
"and %[mask], r0 \n"
"or r5, r0 \n"
"mov.b r0, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r5 \n"
".ur_f3: \n"
"mov.b @%[addr], r0 \n"
"and %[mask], r0 \n"
"or r6, r0 \n"
"mov.b r0, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r6 \n"
".ur_f2: \n"
"mov.b @%[addr], r0 \n"
"and %[mask], r0 \n"
"or r7, r0 \n"
"mov.b r0, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r7 \n"
".ur_f1: \n"
"mov.b @%[addr], r0 \n"
"and %[mask], r0 \n"
"or r8, r0 \n"
"mov.b r0, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r8 \n"
"add #-8, %[dpth] \n"
"cmp/pl %[dpth] \n" /* next round if anything left */
"bt .ur_floop \n"
"bra .ur_end \n"
"nop \n"
/* References to C library routines used in the precalc block */
".align 2 \n"
".ashlsi3: \n" /* C library routine: */
".long ___ashlsi3 \n" /* shift r4 left by r5, result in r0 */
".lshrsi3: \n" /* C library routine: */
".long ___lshrsi3 \n" /* shift r4 right by r5, result in r0 */
/* both routines preserve r4, destroy r5 and take ~16 cycles */
/* Bitmasks for the bit block rotation */
".ur_mask4: \n"
".long 0xF0F0F0F0 \n"
".ur_mask2: \n"
".long 0xCCCCCCCC \n"
".ur_mask1: \n"
".long 0xAAAAAAAA \n"
".ur_sstart: \n"
"mova .ur_stable, r0 \n" /* jump into short loop */
"mov.b @(r0, %[rx]), %[rx] \n"
"add %[rx], r0 \n"
"jmp @r0 \n"
"nop \n"
".align 2 \n"
".ur_stable: \n"
".byte .ur_s1 - .ur_stable \n"
".byte .ur_s2 - .ur_stable \n"
".byte .ur_s3 - .ur_stable \n"
".byte .ur_s4 - .ur_stable \n"
".byte .ur_s5 - .ur_stable \n"
".byte .ur_s6 - .ur_stable \n"
".byte .ur_s7 - .ur_stable \n"
".byte .ur_s8 - .ur_stable \n"
".ur_sloop: \n" /** short loop (nothing to keep) **/
".ur_s8: \n"
"mov.b r1, @%[addr] \n" /* store byte */
"add %[psiz], %[addr] \n"
"shlr8 r1 \n" /* shift out used-up byte */
".ur_s7: \n"
"mov.b r2, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r2 \n"
".ur_s6: \n"
"mov.b r3, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r3 \n"
".ur_s5: \n"
"mov.b r4, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r4 \n"
".ur_s4: \n"
"mov.b r5, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r5 \n"
".ur_s3: \n"
"mov.b r6, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r6 \n"
".ur_s2: \n"
"mov.b r7, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r7 \n"
".ur_s1: \n"
"mov.b r8, @%[addr] \n"
"add %[psiz], %[addr] \n"
"shlr8 r8 \n"
"add #-8, %[dpth] \n"
"cmp/pl %[dpth] \n" /* next round if anything left */
"bt .ur_sloop \n"
".ur_end: \n"
: /* outputs */
[addr]"+r"(addr),
[dpth]"+r"(depth),
[rx] "=&r"(trash)
: /* inputs */
[mask]"r"(mask),
[psiz]"r"(_gray_info.plane_size),
[patp]"[rx]"(pat_ptr)
: /* clobbers */
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "macl"
);
}
#elif defined(CPU_COLDFIRE)
asm volatile (
"move.l (%[cbuf]), %%d0 \n"
"move.l (%[bbuf]), %%d1 \n"
"eor.l %%d0, %%d1 \n"
"move.l (4,%[cbuf]), %%d0 \n"
"move.l (4,%[bbuf]), %[chg] \n"
"eor.l %%d0, %[chg] \n"
"or.l %%d1, %[chg] \n"
: /* outputs */
[chg] "=&d"(change)
: /* inputs */
[cbuf]"a"(cbuf),
[bbuf]"a"(bbuf)
: /* clobbers */
"d0", "d1"
);
if (change != 0)
{
unsigned char *addr;
unsigned mask, depth, trash;
pat_ptr = &pat_stack[8];
/* precalculate the bit patterns with random shifts
* for all 8 pixels and put them on an extra "stack" */
asm volatile
(
"moveq.l #8, %%d3 \n" /* loop count */
"clr.l %[mask] \n"
".ur_pre_loop: \n"
"clr.l %%d0 \n"
"move.b (%[cbuf])+, %%d0 \n" /* read current buffer */
"clr.l %%d1 \n"
"move.b (%[bbuf]), %%d1 \n" /* read back buffer */
"move.b %%d0, (%[bbuf])+ \n" /* update back buffer */
"clr.l %%d2 \n" /* preset for skipped pixel */
"cmp.l %%d0, %%d1 \n" /* no change? */
"beq.b .ur_skip \n" /* -> skip */
"move.l (%%d0:l:4, %[bpat]), %%d2 \n" /* d2 = bitpattern[byte]; */
"mulu.w #75, %[rnd] \n" /* multiply by 75 */
"add.l #74, %[rnd] \n" /* add another 74 */
/* Since the lower bits are not very random: */
"move.l %[rnd], %%d1 \n"
"lsr.l #8, %%d1 \n" /* get bits 8..15 (need max. 5) */
"and.l %[rmsk], %%d1 \n" /* mask out unneeded bits */
"cmp.l %[dpth], %%d1 \n" /* random >= depth ? */
"blo.b .ur_ntrim \n"
"sub.l %[dpth], %%d1 \n" /* yes: random -= depth; */
".ur_ntrim: \n"
"move.l %%d2, %%d0 \n" /** rotate pattern **/
"lsl.l %%d1, %%d0 \n"
"sub.l %[dpth], %%d1 \n"
"neg.l %%d1 \n" /* d1 = depth - d1 */
"lsr.l %%d1, %%d2 \n"
"or.l %%d0, %%d2 \n" /* rotated_pattern = d2 | d0 */
"or.l #0x0100, %[mask] \n" /* set mask bit */
".ur_skip: \n"
"lsr.l #1, %[mask] \n" /* shift mask */
"move.l %%d2, -(%[patp]) \n" /* push on pattern stack */
"subq.l #1, %%d3 \n" /* loop 8 times (pixel block) */
"bne.b .ur_pre_loop \n"
: /* outputs */
[cbuf]"+a"(cbuf),
[bbuf]"+a"(bbuf),
[patp]"+a"(pat_ptr),
[rnd] "+d"(_gray_random_buffer),
[mask]"=&d"(mask)
: /* inputs */
[bpat]"a"(_gray_info.bitpattern),
[dpth]"d"(_gray_info.depth),
[rmsk]"d"(_gray_info.randmask)
: /* clobbers */
"d0", "d1", "d2", "d3"
);
addr = dst_row;
mask = ~mask & 0xff;
depth = _gray_info.depth;
/* set the bits for all 8 pixels in all bytes according to the
* precalculated patterns on the pattern stack */
asm volatile
(
"movem.l (%[patp]), %%d1-%%d7/%%a0 \n" /* pop all 8 patterns */
/* move.l %%d5, %[ax] */ /* need %%d5 as workspace, but not yet */
/** Rotate the four 8x8 bit "blocks" within r1..r8 **/
"move.l %%d1, %%d0 \n" /** Stage 1: 4 bit "comb" **/
"lsl.l #4, %%d0 \n"
/* move.l %[ax], %%d5 */ /* already in d5 */
"eor.l %%d5, %%d0 \n"
"and.l #0xF0F0F0F0, %%d0 \n" /* bitmask = ...11110000 */
"eor.l %%d0, %%d5 \n"
"move.l %%d5, %[ax] \n" /* ax = ...h3h2h1h0d3d2d1d0 */
"lsr.l #4, %%d0 \n"
"eor.l %%d0, %%d1 \n" /* d1 = ...h7h6h5h4d7d6d5d4 */
"move.l %%d2, %%d0 \n"
"lsl.l #4, %%d0 \n"
"eor.l %%d6, %%d0 \n"
"and.l #0xF0F0F0F0, %%d0 \n"
"eor.l %%d0, %%d6 \n" /* d6 = ...g3g2g1g0c3c2c1c0 */
"lsr.l #4, %%d0 \n"
"eor.l %%d0, %%d2 \n" /* d2 = ...g7g6g5g4c7c6c5c4 */
"move.l %%d3, %%d0 \n"
"lsl.l #4, %%d0 \n"
"eor.l %%d7, %%d0 \n"
"and.l #0xF0F0F0F0, %%d0 \n"
"eor.l %%d0, %%d7 \n" /* d7 = ...f3f2f1f0b3b2b1b0 */
"lsr.l #4, %%d0 \n"
"eor.l %%d0, %%d3 \n" /* d3 = ...f7f6f5f4f7f6f5f4 */
"move.l %%d4, %%d0 \n"
"lsl.l #4, %%d0 \n"
"move.l %%a0, %%d5 \n"
"eor.l %%d5, %%d0 \n"
"and.l #0xF0F0F0F0, %%d0 \n"
"eor.l %%d0, %%d5 \n" /* (a0 = ...e3e2e1e0a3a2a1a0) */
/* move.l %%d5, %%a0 */ /* but d5 is kept until next usage */
"lsr.l #4, %%d0 \n"
"eor.l %%d0, %%d4 \n" /* d4 = ...e7e6e5e4a7a6a5a4 */
"move.l %%d6, %%d0 \n" /** Stage 2: 2 bit "comb" **/
"lsl.l #2, %%d0 \n"
/* move.l %%a0, %%d5 */ /* still in d5 */
"eor.l %%d5, %%d0 \n"
"and.l #0xCCCCCCCC, %%d0 \n" /* bitmask = ...11001100 */
"eor.l %%d0, %%d5 \n"
"move.l %%d5, %%a0 \n" /* a0 = ...g1g0e1e0c1c0a1a0 */
"lsr.l #2, %%d0 \n"
"eor.l %%d0, %%d6 \n" /* d6 = ...g3g2e3e2c3c2a3a2 */
"move.l %[ax], %%d5 \n"
"move.l %%d5, %%d0 \n"
"lsl.l #2, %%d0 \n"
"eor.l %%d7, %%d0 \n"
"and.l #0xCCCCCCCC, %%d0 \n"
"eor.l %%d0, %%d7 \n" /* r2 = ...h1h0f1f0d1d0b1b0 */
"lsr.l #2, %%d0 \n"
"eor.l %%d0, %%d5 \n" /* (ax = ...h3h2f3f2d3d2b3b2) */
/* move.l %%d5, %[ax] */ /* but d5 is kept until next usage */
"move.l %%d2, %%d0 \n"
"lsl.l #2, %%d0 \n"
"eor.l %%d4, %%d0 \n"
"and.l #0xCCCCCCCC, %%d0 \n"
"eor.l %%d0, %%d4 \n" /* d4 = ...g5g4e5e4c5c4a5a4 */
"lsr.l #2, %%d0 \n"
"eor.l %%d0, %%d2 \n" /* d2 = ...g7g6e7e6c7c6a7a6 */
"move.l %%d1, %%d0 \n"
"lsl.l #2, %%d0 \n"
"eor.l %%d3, %%d0 \n"
"and.l #0xCCCCCCCC, %%d0 \n"
"eor.l %%d0, %%d3 \n" /* d3 = ...h5h4f5f4d5d4b5b4 */
"lsr.l #2, %%d0 \n"
"eor.l %%d0, %%d1 \n" /* d1 = ...h7h6f7f6d7d6b7b6 */
"move.l %%d1, %%d0 \n" /** Stage 3: 1 bit "comb" **/
"lsl.l #1, %%d0 \n"
"eor.l %%d2, %%d0 \n"
"and.l #0xAAAAAAAA, %%d0 \n" /* bitmask = ...10101010 */
"eor.l %%d0, %%d2 \n" /* d2 = ...h6g6f6e6d6c6b6a6 */
"lsr.l #1, %%d0 \n"
"eor.l %%d0, %%d1 \n" /* d1 = ...h7g7f7e7d7c7b7a7 */
"move.l %%d3, %%d0 \n"
"lsl.l #1, %%d0 \n"
"eor.l %%d4, %%d0 \n"
"and.l #0xAAAAAAAA, %%d0 \n"
"eor.l %%d0, %%d4 \n" /* d4 = ...h4g4f4e4d4c4b4a4 */
"lsr.l #1, %%d0 \n"
"eor.l %%d0, %%d3 \n" /* d3 = ...h5g5f5e5d5c5b5a5 */
/* move.l %[ax], %%d5 */ /* still in d5 */
"move.l %%d5, %%d0 \n"
"lsl.l #1, %%d0 \n"
"eor.l %%d6, %%d0 \n"
"and.l #0xAAAAAAAA, %%d0 \n"
"eor.l %%d0, %%d6 \n" /* d6 = ...h2g2f2e2d2c2b2a2 */
"lsr.l #1, %%d0 \n"
"eor.l %%d0, %%d5 \n"
"move.l %%d5, %[ax] \n" /* ax = ...h3g3f3e3d3c3b3a3 */
"move.l %%d7, %%d0 \n"
"lsl.l #1, %%d0 \n"
"move.l %%a0, %%d5 \n"
"eor.l %%d5, %%d0 \n"
"and.l #0xAAAAAAAA, %%d0 \n"
"eor.l %%d0, %%d5 \n" /* (a0 = ...h0g0f0e0d0c0b0a0) */
/* move.l %%d5, %%a0 */ /* but keep in d5 for shift streak */
"lsr.l #1, %%d0 \n"
"eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */
"move.l %[dpth], %%d0 \n" /** shift out unused low bytes **/
"subq.l #1, %%d0 \n"
"and.l #7, %%d0 \n"
"move.l %%d0, %%a0 \n"
"move.l %[ax], %%d0 \n" /* all data in D registers */
"jmp (2, %%pc, %%a0:l:2) \n" /* jump into shift streak */
"lsr.l #8, %%d2 \n"
"lsr.l #8, %%d3 \n"
"lsr.l #8, %%d4 \n"
"lsr.l #8, %%d0 \n"
"lsr.l #8, %%d6 \n"
"lsr.l #8, %%d7 \n"
"lsr.l #8, %%d5 \n"
"move.l %%d0, %[ax] \n" /* put the 2 extra words back.. */
"move.l %%a0, %%d0 \n" /* keep the value for later */
"move.l %%d5, %%a0 \n" /* ..into their A registers */
"tst.l %[mask] \n"
"jeq .ur_sstart \n" /* short loop if nothing to keep */
"move.l %[mask], %%d5 \n" /* need mask in data reg. */
"move.l %%d1, %[mask] \n" /* free d1 as working reg. */
"jmp (2, %%pc, %%d0:l:2) \n" /* jump into full loop */
"bra.s .ur_f1 \n"
"bra.s .ur_f2 \n"
"bra.s .ur_f3 \n"
"bra.s .ur_f4 \n"
"bra.s .ur_f5 \n"
"bra.s .ur_f6 \n"
"bra.s .ur_f7 \n"
/* bra.s .ur_f8 */ /* identical with target */
".ur_floop: \n" /** full loop (there are bits to keep)**/
".ur_f8: \n"
"move.b (%[addr]), %%d0 \n" /* load old byte */
"and.l %%d5, %%d0 \n" /* mask out replaced bits */
"move.l %%a0, %%d1 \n"
"or.l %%d1, %%d0 \n" /* set new bits */
"move.b %%d0, (%[addr]) \n" /* store byte */
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d1 \n" /* shift out used-up byte */
"move.l %%d1, %%a0 \n"
".ur_f7: \n"
"move.b (%[addr]), %%d0 \n"
"and.l %%d5, %%d0 \n"
"or.l %%d7, %%d0 \n"
"move.b %%d0, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d7 \n"
".ur_f6: \n"
"move.b (%[addr]), %%d0 \n"
"and.l %%d5, %%d0 \n"
"or.l %%d6, %%d0 \n"
"move.b %%d0, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d6 \n"
".ur_f5: \n"
"move.b (%[addr]), %%d0 \n"
"and.l %%d5, %%d0 \n"
"move.l %[ax], %%d1 \n"
"or.l %%d1, %%d0 \n"
"move.b %%d0, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d1 \n"
"move.l %%d1, %[ax] \n"
".ur_f4: \n"
"move.b (%[addr]), %%d0 \n"
"and.l %%d5, %%d0 \n"
"or.l %%d4, %%d0 \n"
"move.b %%d0, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d4 \n"
".ur_f3: \n"
"move.b (%[addr]), %%d0 \n"
"and.l %%d5, %%d0 \n"
"or.l %%d3, %%d0 \n"
"move.b %%d0, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d3 \n"
".ur_f2: \n"
"move.b (%[addr]), %%d0 \n"
"and.l %%d5, %%d0 \n"
"or.l %%d2, %%d0 \n"
"move.b %%d0, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d2 \n"
".ur_f1: \n"
"move.b (%[addr]), %%d0 \n"
"and.l %%d5, %%d0 \n"
"move.l %[mask], %%d1 \n"
"or.l %%d1, %%d0 \n"
"move.b %%d0, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d1 \n"
"move.l %%d1, %[mask] \n"
"subq.l #8, %[dpth] \n"
"tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
"jgt .ur_floop \n" /* next round if anything left */
"jra .ur_end \n"
".ur_sstart: \n"
"jmp (2, %%pc, %%d0:l:2) \n" /* jump into short loop */
"bra.s .ur_s1 \n"
"bra.s .ur_s2 \n"
"bra.s .ur_s3 \n"
"bra.s .ur_s4 \n"
"bra.s .ur_s5 \n"
"bra.s .ur_s6 \n"
"bra.s .ur_s7 \n"
/* bra.s .ur_s8 */ /* identical with target */
".ur_sloop: \n" /** short loop (nothing to keep) **/
".ur_s8: \n"
"move.l %%a0, %%d5 \n"
"move.b %%d5, (%[addr]) \n" /* store byte */
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d5 \n" /* shift out used-up byte */
"move.l %%d5, %%a0 \n"
".ur_s7: \n"
"move.b %%d7, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d7 \n"
".ur_s6: \n"
"move.b %%d6, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d6 \n"
".ur_s5: \n"
"move.l %[ax], %%d5 \n"
"move.b %%d5, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d5 \n"
"move.l %%d5, %[ax] \n"
".ur_s4: \n"
"move.b %%d4, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d4 \n"
".ur_s3: \n"
"move.b %%d3, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d3 \n"
".ur_s2: \n"
"move.b %%d2, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d2 \n"
".ur_s1: \n"
"move.b %%d1, (%[addr]) \n"
"add.l %[psiz], %[addr] \n"
"lsr.l #8, %%d1 \n"
"subq.l #8, %[dpth] \n"
"tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
"jgt .ur_sloop \n" /* next round if anything left */
".ur_end: \n"
: /* outputs */
[addr]"+a"(addr),
[dpth]"+a"(depth),
[mask]"+a"(mask),
[ax] "=&a"(trash)
: /* inputs */
[psiz]"a"(_gray_info.plane_size),
[patp]"[ax]"(pat_ptr)
: /* clobbers */
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "a0"
);
}
#else /* C version, for reference*/
#warning C version of gray_update_rect() used
(void)pat_ptr;
/* check whether anything changed in the 8-pixel block */
change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4);
if (change != 0)
{
unsigned char *addr, *end;
unsigned mask = 0;
unsigned test = 1 << ((-_gray_info.depth) & 7);
int i;
/* precalculate the bit patterns with random shifts
* for all 8 pixels and put them on an extra "stack" */
for (i = 0; i < 8; i++)
{
unsigned pat = 0;
unsigned char cur = *cbuf++;
unsigned char back = *bbuf;
*bbuf++ = cur;
if (cur != back)
{
int shift;
pat = _gray_info.bitpattern[cur];
/* shift pattern pseudo-random, simple & fast PRNG */
_gray_random_buffer = 75 * _gray_random_buffer + 74;
shift = (_gray_random_buffer >> 8) & _gray_info.randmask;
if (shift >= _gray_info.depth)
shift -= _gray_info.depth;
pat = (pat << shift) | (pat >> (_gray_info.depth - shift));
mask |= 0x100;
}
mask >>= 1;
pat_stack[i] = pat;
}
addr = dst_row;
end = addr + MULU16(_gray_info.depth, _gray_info.plane_size);
/* set the bits for all 8 pixels in all bytes according to the
* precalculated patterns on the pattern stack */
mask = (~mask & 0xff);
if (mask == 0)
{
do
{
unsigned data = 0;
for (i = 7; i >= 0; i--)
data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0);
*addr = data;
addr += _gray_info.plane_size;
test <<= 1;
}
while (addr < end);
}
else
{
do
{
unsigned data = 0;
for (i = 7; i >= 0; i--)
data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0);
*addr = (*addr & mask) | data;
addr += _gray_info.plane_size;
test <<= 1;
}
while (addr < end);
}
}
#endif /* CONFIG_CPU */
srcofs_row += _gray_info.height;
dst_row++;
}
while (dst_row < dst_end);
srcofs += 8;
dst += _gray_info.width;
}
}
#endif /* LCD_PIXELFORMAT */
#endif /* !SIMULATOR */
/* Update the whole greyscale overlay */
void gray_update(void)
{
gray_update_rect(0, 0, _gray_info.width, _gray_info.height);
}
/* Do an lcd_update() to show changes done by rb->lcd_xxx() functions
(in areas of the screen not covered by the greyscale overlay). */
void gray_deferred_lcd_update(void)
{
if (_gray_info.flags & _GRAY_RUNNING)
{
#ifdef SIMULATOR
_deferred_update();
#else
_gray_info.flags |= _GRAY_DEFERRED_UPDATE;
#endif
}
else
_gray_rb->lcd_update();
}
/*** Screenshot ***/
#define BMP_FIXEDCOLORS (1 << LCD_DEPTH)
#define BMP_VARCOLORS 33
#define BMP_NUMCOLORS (BMP_FIXEDCOLORS + BMP_VARCOLORS)
#define BMP_BPP 8
#define BMP_LINESIZE ((LCD_WIDTH + 3) & ~3)
#define BMP_HEADERSIZE (54 + 4 * BMP_NUMCOLORS)
#define BMP_DATASIZE (BMP_LINESIZE * LCD_HEIGHT)
#define BMP_TOTALSIZE (BMP_HEADERSIZE + BMP_DATASIZE)
#define LE16_CONST(x) (x)&0xff, ((x)>>8)&0xff
#define LE32_CONST(x) (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
static const unsigned char bmpheader[] =
{
0x42, 0x4d, /* 'BM' */
LE32_CONST(BMP_TOTALSIZE), /* Total file size */
0x00, 0x00, 0x00, 0x00, /* Reserved */
LE32_CONST(BMP_HEADERSIZE), /* Offset to start of pixel data */
0x28, 0x00, 0x00, 0x00, /* Size of (2nd) header */
LE32_CONST(LCD_WIDTH), /* Width in pixels */
LE32_CONST(LCD_HEIGHT), /* Height in pixels */
0x01, 0x00, /* Number of planes (always 1) */
LE16_CONST(BMP_BPP), /* Bits per pixel 1/4/8/16/24 */
0x00, 0x00, 0x00, 0x00, /* Compression mode, 0 = none */
LE32_CONST(BMP_DATASIZE), /* Size of bitmap data */
0xc4, 0x0e, 0x00, 0x00, /* Horizontal resolution (pixels/meter) */
0xc4, 0x0e, 0x00, 0x00, /* Vertical resolution (pixels/meter) */
LE32_CONST(BMP_NUMCOLORS), /* Number of used colours */
LE32_CONST(BMP_NUMCOLORS), /* Number of important colours */
/* Fixed colours */
#if LCD_DEPTH == 1
0x90, 0xee, 0x90, 0x00, /* Colour #0 */
0x00, 0x00, 0x00, 0x00 /* Colour #1 */
#elif LCD_DEPTH == 2
0xe6, 0xd8, 0xad, 0x00, /* Colour #0 */
0x99, 0x90, 0x73, 0x00, /* Colour #1 */
0x4c, 0x48, 0x39, 0x00, /* Colour #2 */
0x00, 0x00, 0x00, 0x00 /* Colour #3 */
#endif
};
#if LCD_DEPTH == 1
#define BMP_RED 0x90
#define BMP_GREEN 0xee
#define BMP_BLUE 0x90
#elif LCD_DEPTH == 2
#define BMP_RED 0xad
#define BMP_GREEN 0xd8
#define BMP_BLUE 0xe6
#endif
/* Hook function for core screen_dump() to save the current display
content (b&w and greyscale overlay) to an 8-bit BMP file. */
static void gray_screendump_hook(int fd)
{
int i;
int x, y;
int gx, gy;
#if (LCD_DEPTH == 1) || !defined(SIMULATOR)
int mask;
#endif
#if LCD_PIXELFORMAT == HORIZONTAL_PACKING
unsigned data;
#else
int by;
#if LCD_DEPTH == 2
int shift;
#endif
#endif
unsigned char *clut_entry;
unsigned char *lcdptr;
unsigned char linebuf[MAX(4*BMP_VARCOLORS,BMP_LINESIZE)];
_gray_rb->write(fd, bmpheader, sizeof(bmpheader)); /* write header */
/* build clut */
_gray_rb->memset(linebuf, 0, 4*BMP_VARCOLORS);
clut_entry = linebuf;
for (i = _gray_info.depth; i > 0; i--)
{
*clut_entry++ = MULU16(BMP_BLUE, i) / _gray_info.depth;
*clut_entry++ = MULU16(BMP_GREEN, i) / _gray_info.depth;
*clut_entry++ = MULU16(BMP_RED, i) / _gray_info.depth;
clut_entry++;
}
_gray_rb->write(fd, linebuf, 4*BMP_VARCOLORS);
/* BMP image goes bottom -> top */
for (y = LCD_HEIGHT - 1; y >= 0; y--)
{
_gray_rb->memset(linebuf, 0, BMP_LINESIZE);
gy = y - _gray_info.y;
#if LCD_PIXELFORMAT == HORIZONTAL_PACKING
#if LCD_DEPTH == 2
lcdptr = _gray_rb->lcd_framebuffer + MULU16(LCD_FBWIDTH, y);
if ((unsigned) gy < (unsigned) _gray_info.height)
{
/* line contains greyscale (and maybe b&w) graphics */
#ifndef SIMULATOR
unsigned char *grayptr = _gray_info.plane_data
+ MULU16(_gray_info.bwidth, gy);
#endif
for (x = 0; x < LCD_WIDTH; x += 4)
{
gx = x - _gray_info.x;
if ((unsigned)gx < (unsigned)_gray_info.width)
{
#ifdef SIMULATOR
data = MULU16(gy, _gray_info.width) + gx;
for (i = 0; i < 4; i++)
linebuf[x + i] = BMP_FIXEDCOLORS + _gray_info.depth
- _gray_info.cur_buffer[data + i];
#else
mask = 0x80 >> (gx & 7);
for (i = 0; i < 4; i++)
{
int j;
int idx = BMP_FIXEDCOLORS;
unsigned char *grayptr2 = grayptr + (gx >> 3);
for (j = _gray_info.depth; j > 0; j--)
{
if (*grayptr2 & mask)
idx++;
grayptr2 += _gray_info.plane_size;
}
linebuf[x + i] = idx;
mask >>= 1;
}
#endif
}
else
{
data = *lcdptr;
linebuf[x] = (data >> 6) & 3;
linebuf[x + 1] = (data >> 4) & 3;
linebuf[x + 2] = (data >> 2) & 3;
linebuf[x + 3] = data & 3;
}
lcdptr++;
}
}
else
{
/* line contains only b&w graphics */
for (x = 0; x < LCD_WIDTH; x += 4)
{
data = *lcdptr++;
linebuf[x] = (data >> 6) & 3;
linebuf[x + 1] = (data >> 4) & 3;
linebuf[x + 2] = (data >> 2) & 3;
linebuf[x + 3] = data & 3;
}
}
#endif /* LCD_DEPTH */
#else /* LCD_PIXELFORMAT == VERTICAL_PACKING */
#if LCD_DEPTH == 1
mask = 1 << (y & 7);
by = y >> 3;
lcdptr = _gray_rb->lcd_framebuffer + MULU16(LCD_WIDTH, by);
if ((unsigned) gy < (unsigned) _gray_info.height)
{
/* line contains greyscale (and maybe b&w) graphics */
#ifndef SIMULATOR
unsigned char *grayptr = _gray_info.plane_data
+ MULU16(_gray_info.width, gy >> 3);
#endif
for (x = 0; x < LCD_WIDTH; x++)
{
gx = x - _gray_info.x;
if ((unsigned)gx < (unsigned)_gray_info.width)
{
#ifdef SIMULATOR
linebuf[x] = BMP_FIXEDCOLORS + _gray_info.depth
- _gray_info.cur_buffer[MULU16(gx, _gray_info.height) + gy];
#else
int idx = BMP_FIXEDCOLORS;
unsigned char *grayptr2 = grayptr + gx;
for (i = _gray_info.depth; i > 0; i--)
{
if (*grayptr2 & mask)
idx++;
grayptr2 += _gray_info.plane_size;
}
linebuf[x] = idx;
#endif
}
else
{
linebuf[x] = (*lcdptr & mask) ? 1 : 0;
}
lcdptr++;
}
}
else
{
/* line contains only b&w graphics */
for (x = 0; x < LCD_WIDTH; x++)
linebuf[x] = (*lcdptr++ & mask) ? 1 : 0;
}
#elif LCD_DEPTH == 2
shift = 2 * (y & 3);
by = y >> 2;
lcdptr = _gray_rb->lcd_framebuffer + MULU16(LCD_WIDTH, by);
if ((unsigned)gy < (unsigned)_gray_info.height)
{
/* line contains greyscale (and maybe b&w) graphics */
#ifndef SIMULATOR
unsigned char *grayptr = _gray_info.plane_data
+ MULU16(_gray_info.width, gy >> 3);
mask = 1 << (gy & 7);
#endif
for (x = 0; x < LCD_WIDTH; x++)
{
gx = x - _gray_info.x;
if ((unsigned)gx < (unsigned)_gray_info.width)
{
#ifdef SIMULATOR
linebuf[x] = BMP_FIXEDCOLORS + _gray_info.depth
- _gray_info.cur_buffer[MULU16(gx, _gray_info.height) + gy];
#else
int idx = BMP_FIXEDCOLORS;
unsigned char *grayptr2 = grayptr + gx;
for (i = _gray_info.depth; i > 0; i--)
{
if (*grayptr2 & mask)
idx++;
grayptr2 += _gray_info.plane_size;
}
linebuf[x] = idx;
#endif
}
else
{
linebuf[x] = (*lcdptr >> shift) & 3;
}
lcdptr++;
}
}
else
{
/* line contains only b&w graphics */
for (x = 0; x < LCD_WIDTH; x++)
linebuf[x] = (*lcdptr++ >> shift) & 3;
}
#endif /* LCD_DEPTH */
#endif /* LCD_PIXELFORMAT */
_gray_rb->write(fd, linebuf, BMP_LINESIZE);
}
}
#endif /* HAVE_LCD_BITMAP */