99f9550881
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15111 a1c6a512-1295-4272-9138-f99709370657
414 lines
13 KiB
ArmAsm
414 lines
13 KiB
ArmAsm
/***************************************************************************
|
|
* __________ __ ___.
|
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
* \/ \/ \/ \/ \/
|
|
* $Id$
|
|
*
|
|
* Copyright (C) 2006 by Michael Sevakis
|
|
* Based on lcd_write_data for H300 in lcd.S
|
|
*
|
|
* All files in this archive are subject to the GNU General Public License.
|
|
* See the file COPYING in the source tree root for full license agreement.
|
|
*
|
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
* KIND, either express or implied.
|
|
*
|
|
****************************************************************************/
|
|
|
|
#include "config.h"
|
|
#include "cpu.h"
|
|
|
|
.section .icode,"ax",@progbits
|
|
|
|
/* begin lcd_write_yuv420_lines
|
|
*
|
|
* See http://en.wikipedia.org/wiki/YCbCr
|
|
* ITU-R BT.601 (formerly CCIR 601):
|
|
* |Y'| | 0.299000 0.587000 0.114000| |R|
|
|
* |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y')
|
|
* |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y')
|
|
* Scaled, normalized and rounded:
|
|
* |Y'| | 65 129 25| |R| + 16 : 16->235
|
|
* |Cb| = |-38 -74 112| |G| + 128 : 16->240
|
|
* |Cr| |112 -94 -18| |B| + 128 : 16->240
|
|
*
|
|
* The inverse:
|
|
* |R| |1.000000 -0.000001 1.402000| |Y'|
|
|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
|
|
* |B| |1.000000 1.772000 0.000000| |Pr|
|
|
* Scaled, normalized, rounded and tweaked to yield RGB 666:
|
|
* |R| |19611723 0 26881894| |Y' - 16| >> 26
|
|
* |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26
|
|
* |B| |19611723 33976259 0| |Cr - 128| >> 26
|
|
*
|
|
* Needs EMAC set to saturated, signed integer mode.
|
|
*
|
|
* register usage:
|
|
* %a0 - LCD data port
|
|
* %a1 - Y pointer
|
|
* %a2 - C pointer
|
|
* %a3 - C width
|
|
* %a4 - Y end address
|
|
* %a5 - Y factor
|
|
* %a6 - BU factor
|
|
* %d0 - scratch
|
|
* %d1 - B, previous Y \ alternating
|
|
* %d2 - U / B, previous Y /
|
|
* %d3 - V / G
|
|
* %d4 - R / output pixel
|
|
* %d5 - GU factor
|
|
* %d6 - GV factor
|
|
* %d7 - RGB signed -> unsigned conversion mask
|
|
*/
|
|
.align 2
|
|
.global lcd_write_yuv420_lines
|
|
.type lcd_write_yuv420_lines, @function
|
|
|
|
lcd_write_yuv420_lines:
|
|
lea.l (-44, %sp), %sp /* free up some registers */
|
|
movem.l %d2-%d7/%a2-%a6, (%sp)
|
|
|
|
lea.l 0xf0008002, %a0 /* LCD data port */
|
|
movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */
|
|
lea.l (%a1, %a3*2), %a4 /* Y end address */
|
|
|
|
move.l #19611723, %a5 /* y factor */
|
|
move.l #33976259, %a6 /* bu factor */
|
|
move.l #-6406711, %d5 /* gu factor */
|
|
move.l #-13692816, %d6 /* gv factor */
|
|
move.l #0x01040820, %d7 /* bitmask for signed->unsigned conversion
|
|
* of R, G and B within RGGB6666 at once */
|
|
|
|
/* chroma for first 2x2 block */
|
|
clr.l %d3 /* load v component */
|
|
move.b (%a2, %a3), %d3
|
|
clr.l %d2 /* load u component */
|
|
move.b (%a2)+, %d2
|
|
moveq.l #-128, %d0
|
|
add.l %d0, %d2
|
|
add.l %d0, %d3
|
|
|
|
mac.l %a6, %d2, %acc0 /* bu */
|
|
mac.l %d5, %d2, %acc1 /* gu */
|
|
mac.l %d6, %d3, %acc1 /* gv */
|
|
move.l #26881894, %d0 /* rv factor */
|
|
mac.l %d0, %d3, %acc2 /* rv */
|
|
|
|
/* luma for very first pixel (top left) */
|
|
clr.l %d1
|
|
move.b (%a1, %a3*2), %d1
|
|
moveq.l #-126, %d0
|
|
add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
|
|
mac.l %a5, %d0, %acc0
|
|
mac.l %a5, %d0, %acc1
|
|
mac.l %a5, %d0, %acc2
|
|
|
|
bra.b .yuv_line_entry
|
|
|
|
.yuv_line_loop:
|
|
/* chroma for 2x2 pixel block */
|
|
clr.l %d3 /* load v component */
|
|
move.b (%a2, %a3), %d3
|
|
clr.l %d2 /* load u component */
|
|
move.b (%a2)+, %d2
|
|
moveq.l #-128, %d0
|
|
add.l %d0, %d2
|
|
add.l %d0, %d3
|
|
|
|
mac.l %a6, %d2, %acc0 /* bu */
|
|
mac.l %d5, %d2, %acc1 /* gu */
|
|
mac.l %d6, %d3, %acc1 /* gv */
|
|
move.l #26881894, %d0 /* rv factor */
|
|
mac.l %d0, %d3, %acc2 /* rv */
|
|
|
|
/* luma for first pixel (top left) */
|
|
clr.l %d1
|
|
move.b (%a1, %a3*2), %d1
|
|
moveq.l #-126, %d0
|
|
add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
|
|
mac.l %a5, %d0, %acc0
|
|
mac.l %a5, %d0, %acc1
|
|
mac.l %a5, %d0, %acc2
|
|
|
|
move.w %d4, (%a0)
|
|
/* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
|
|
|
|
/* convert to RGB666, pack and output */
|
|
.yuv_line_entry:
|
|
moveq.l #26, %d0
|
|
move.l %acc0, %d4
|
|
move.l %acc1, %d3
|
|
move.l %acc2, %d2
|
|
lsr.l %d0, %d4
|
|
lsr.l %d0, %d3
|
|
lsr.l %d0, %d2
|
|
|
|
lsl.l #6, %d2
|
|
or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
|
|
lsl.l #7, %d2
|
|
or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
|
|
lsl.l #6, %d3
|
|
or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
|
|
eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
|
|
swap %d4
|
|
move.w %d4, (%a0)
|
|
swap %d4
|
|
|
|
/* luma for second pixel (bottom left) as delta from the first */
|
|
clr.l %d2
|
|
move.b (%a1)+, %d2
|
|
move.l %d2, %d0
|
|
sub.l %d1, %d0
|
|
mac.l %a5, %d0, %acc0
|
|
mac.l %a5, %d0, %acc1
|
|
mac.l %a5, %d0, %acc2
|
|
|
|
move.w %d4, (%a0)
|
|
/* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
|
|
|
|
/* convert to RGB666, pack and output */
|
|
moveq.l #26, %d0
|
|
move.l %acc0, %d4
|
|
move.l %acc1, %d3
|
|
move.l %acc2, %d1
|
|
lsr.l %d0, %d4
|
|
lsr.l %d0, %d3
|
|
lsr.l %d0, %d1
|
|
|
|
lsl.l #6, %d1
|
|
or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */
|
|
lsl.l #7, %d1
|
|
or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
|
|
lsl.l #6, %d3
|
|
or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
|
|
eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
|
|
swap %d4
|
|
move.w %d4, (%a0)
|
|
swap %d4
|
|
|
|
/* luma for third pixel (top right) as delta from the second */
|
|
clr.l %d1
|
|
move.b (%a1, %a3*2), %d1
|
|
move.l %d1, %d0
|
|
sub.l %d2, %d0
|
|
mac.l %a5, %d0, %acc0
|
|
mac.l %a5, %d0, %acc1
|
|
mac.l %a5, %d0, %acc2
|
|
|
|
move.w %d4, (%a0)
|
|
/* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
|
|
|
|
/* convert to RGB666, pack and output */
|
|
moveq.l #26, %d0
|
|
move.l %acc0, %d4
|
|
move.l %acc1, %d3
|
|
move.l %acc2, %d2
|
|
lsr.l %d0, %d4
|
|
lsr.l %d0, %d3
|
|
lsr.l %d0, %d2
|
|
|
|
lsl.l #6, %d2
|
|
or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
|
|
lsl.l #7, %d2
|
|
or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
|
|
lsl.l #6, %d3
|
|
or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
|
|
eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
|
|
swap %d4
|
|
move.w %d4, (%a0)
|
|
swap %d4
|
|
|
|
/* luma for fourth pixel (bottom right) as delta from the thrid */
|
|
clr.l %d2
|
|
move.b (%a1)+, %d2
|
|
move.l %d2, %d0
|
|
sub.l %d1, %d0
|
|
mac.l %a5, %d0, %acc0
|
|
mac.l %a5, %d0, %acc1
|
|
mac.l %a5, %d0, %acc2
|
|
|
|
move.w %d4, (%a0)
|
|
/* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
|
|
|
|
/* convert to RGB666, pack and output */
|
|
moveq.l #26, %d0
|
|
movclr.l %acc0, %d4
|
|
movclr.l %acc1, %d3
|
|
movclr.l %acc2, %d1
|
|
lsr.l %d0, %d4
|
|
lsr.l %d0, %d3
|
|
lsr.l %d0, %d1
|
|
|
|
lsl.l #6, %d1
|
|
or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */
|
|
lsl.l #7, %d1
|
|
or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
|
|
lsl.l #6, %d3
|
|
or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
|
|
eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
|
|
swap %d4
|
|
move.w %d4, (%a0)
|
|
swap %d4
|
|
|
|
cmp.l %a1, %a4 /* run %a1 up to end of line */
|
|
bhi.w .yuv_line_loop
|
|
|
|
move.w %d4, (%a0) /* write (very) last 2nd word */
|
|
|
|
movem.l (%sp), %d2-%d7/%a2-%a6
|
|
lea.l (44, %sp), %sp /* restore registers */
|
|
rts
|
|
.yuv_end:
|
|
.size lcd_write_yuv420_lines, yuv_end - lcd_write_yuv420_lines
|
|
|
|
|
|
/* begin lcd_write_data */
|
|
.align 2
|
|
.global lcd_write_data
|
|
.type lcd_write_data,@function
|
|
lcd_write_data:
|
|
move.l (4,%sp),%a0 /* data pointer */
|
|
move.l (8,%sp),%d0 /* length in words */
|
|
add.l %d0,%d0 /* words -> bytes */
|
|
add.l %a0,%d0 /* -> end address */
|
|
lea.l 0xf0008002,%a1 /* LCD data port */
|
|
|
|
lea.l (-20,%sp),%sp /* free up some registers */
|
|
movem.l %d2-%d5/%a2,(%sp)
|
|
|
|
move.l %a0,%d1
|
|
btst.l #1,%d1 /* already longword aligned? */
|
|
beq.b .wd_wordl_end /* yes: skip initial word copy */
|
|
/* transfer initial word */
|
|
move.w (%a0)+,%d2 /* |????????|????????|rrrrrggg|gggbbbbb| */
|
|
move.l %d2,%d1
|
|
lsr.l #7,%d1 /* |0000000?|????????|???????r|rrrrgggg| */
|
|
move.w %d1,(%a1) /* ^ ^^^^^^^ */
|
|
lsl.l #1,%d2 /* |????????|???????r|rrrrgggg|ggbbbbb0| */
|
|
move.w %d2,(%a1) /* ^ ^^^^^^^ */
|
|
|
|
.wd_wordl_end: /* now longword aligned */
|
|
moveq.l #28,%d1
|
|
add.l %a0,%d1
|
|
and.l #0xFFFFFFF0,%d1 /* %d1 = second line bound */
|
|
cmp.l %d1,%d0 /* at least one full line to send? */
|
|
blo.w .wd_long2_start /* no: skip to trailing longword handling */
|
|
|
|
subq.l #8,%d1
|
|
subq.l #8,%d1 /* %d1 = first line bound */
|
|
|
|
cmp.l %a0,%d1 /* any leading longwords? */
|
|
bls.b .wd_long1_end /* no: skip leading long loop */
|
|
|
|
.wd_long1_loop:
|
|
move.l (%a0)+,%d2 /* read longword */
|
|
swap %d2 /* |rrrrrggg|gggbbbbb|RRRRRGGG|GGGBBBBB| */
|
|
move.l %d2,%d5
|
|
lsr.l #7,%d5 /* |0000000r|rrrrgggg|ggbbbbbR|RRRRGGGG| */
|
|
move.w %d5,(%a1) /* ^ ^^^^^^^ */
|
|
lsl.l #1,%d2 /* |rrrrgggg|ggbbbbbR|RRRRGGGG|GGBBBBB0| */
|
|
move.w %d2,(%a1) /* ^ ^^^^^^^ */
|
|
swap %d5 /* |ggbbbbbR|RRRRGGGG|0000000r|rrrrgggg| */
|
|
move.w %d5,(%a1) /* ^ ^^^^^^^ */
|
|
swap %d2 /* |RRRRBGGG|GGBBBBB0|rrrrgggg|ggbbbbbR| */
|
|
move.w %d2,(%a1) /* ^ ^^^^^^^ */
|
|
cmp.l %a0,%d1
|
|
bhi.b .wd_long1_loop
|
|
|
|
.wd_long1_end:
|
|
move.l %d0,%a2
|
|
lea.l (-14,%a2),%a2
|
|
|
|
.wd_line_loop:
|
|
movem.l (%a0),%d1-%d4 /* burst-read eight words */
|
|
lea.l (16,%a0),%a0 /* increment address */
|
|
|
|
/* transfer four pairs of longs to display */
|
|
swap %d1
|
|
move.l %d1,%d5
|
|
lsr.l #7,%d5
|
|
move.w %d5,(%a1)
|
|
lsl.l #1,%d1
|
|
move.w %d1,(%a1)
|
|
swap %d5
|
|
move.w %d5,(%a1)
|
|
swap %d1
|
|
move.w %d1,(%a1)
|
|
|
|
swap %d2
|
|
move.l %d2,%d5
|
|
lsr.l #7,%d5
|
|
move.w %d5,(%a1)
|
|
lsl.l #1,%d2
|
|
move.w %d2,(%a1)
|
|
swap %d5
|
|
move.w %d5,(%a1)
|
|
swap %d2
|
|
move.w %d2,(%a1)
|
|
|
|
swap %d3
|
|
move.l %d3,%d5
|
|
lsr.l #7,%d5
|
|
move.w %d5,(%a1)
|
|
lsl.l #1,%d3
|
|
move.w %d3,(%a1)
|
|
swap %d5
|
|
move.w %d5,(%a1)
|
|
swap %d3
|
|
move.w %d3,(%a1)
|
|
|
|
swap %d4
|
|
move.l %d4,%d5
|
|
lsr.l #7,%d5
|
|
move.w %d5,(%a1)
|
|
lsl.l #1,%d4
|
|
move.w %d4,(%a1)
|
|
swap %d5
|
|
move.w %d5,(%a1)
|
|
swap %d4
|
|
move.w %d4,(%a1)
|
|
|
|
cmp.l %a0,%a2 /* run %a0 up to last line bound */
|
|
bhi.w .wd_line_loop
|
|
|
|
.wd_long2_start:
|
|
subq.l #2,%d0 /* account for handling 2 words per loop */
|
|
cmp.l %a0,%d0 /* any (trailing longwords? */
|
|
bls.b .wd_long2_end /* no: skip trailing longword loop */
|
|
|
|
.wd_long2_loop:
|
|
move.l (%a0)+,%d2 /* read longword */
|
|
swap %d2
|
|
move.l %d2,%d5
|
|
lsr.l #7,%d5
|
|
move.w %d5,(%a1)
|
|
lsl.l #1,%d2
|
|
move.w %d2,(%a1)
|
|
swap %d5
|
|
move.w %d5,(%a1)
|
|
swap %d2
|
|
move.w %d2,(%a1)
|
|
cmp.l %a0,%d0 /* run %a0 up to last long bound */
|
|
bhi.b .wd_long2_loop
|
|
|
|
.wd_long2_end:
|
|
blo.b .wd_word2_end /* no final word: skip */
|
|
|
|
move.w (%a0)+,%d2 /* transfer final word */
|
|
move.l %d2,%d1
|
|
lsr.l #7,%d1
|
|
move.w %d1,(%a1)
|
|
lsl.l #1,%d2
|
|
move.w %d2,(%a1)
|
|
|
|
.wd_word2_end:
|
|
movem.l (%sp),%d2-%d5/%a2
|
|
lea.l (20,%sp),%sp /* restore registers */
|
|
rts
|
|
|
|
.wd_end:
|
|
.size lcd_write_data,.wd_end-lcd_write_data
|
|
/* end lcd_write_data */
|