Optimizations for doom: coldfire asm drawspan routine = not much, fixed point multiply changes = not much, H300 asm lcd update = some, IRAM sound updates and simplifications = more

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9747 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Karl Kurbjun 2006-04-20 19:39:56 +00:00
parent 9e9921b087
commit 29ab31e8f1
4 changed files with 106 additions and 49 deletions

View file

@ -48,11 +48,11 @@
// mixing buffer, and the samplerate of the raw data.
// Needed for calling the actual sound output.
#define SAMPLECOUNT 512
#define SAMPLECOUNT 512
#define NUM_CHANNELS 16
#define NUM_CHANNELS 16
// It is 2 for 16bit, and 2 for two channels.
#define BUFMUL 4
#define BUFMUL 2
#define MIXBUFFERSIZE (SAMPLECOUNT*BUFMUL)
#if (CONFIG_KEYPAD == IPOD_3G_PAD) || (CONFIG_KEYPAD == IPOD_4G_PAD)
@ -66,7 +66,7 @@
// Basically, samples from all active internal channels
// are modifed and added, and stored in the buffer
// that is submitted to the audio device.
signed short *mixbuffer=NULL;
signed short mixbuffer[MIXBUFFERSIZE] IBSS_ATTR;
typedef struct {
// SFX id of the playing sound effect.
@ -91,7 +91,7 @@ typedef struct {
int *rightvol_lookup;
} channel_info_t;
channel_info_t channelinfo[NUM_CHANNELS];
channel_info_t channelinfo[NUM_CHANNELS] IBSS_ATTR;
int *vol_lookup; // Volume lookups.
@ -355,13 +355,6 @@ int I_SoundIsPlaying(int handle)
// This function currently supports only 16bit.
//
bool swap=0;
bool lastswap=1;
// Pointers in global mixbuffer, left, right, end.
signed short* leftout;
signed short* rightout;
signed short* leftend;
void I_UpdateSound( void )
{
// Mix current sound data.
@ -370,25 +363,26 @@ void I_UpdateSound( void )
register int dl;
register int dr;
// Pointers in global mixbuffer, left, right, end.
signed short* leftout;
signed short* rightout;
signed short* leftend;
// Step in mixbuffer, left and right, thus two.
int step;
// Mixing channel index.
int chan;
if(lastswap==swap)
return;
lastswap=swap;
// Left and right channel
// are in global mixbuffer, alternating.
leftout = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2);
rightout = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2)+1;
leftout = mixbuffer;
rightout = mixbuffer +1;
step = 2;
// Determine end, for left channel only
// (right channel is implicit).
leftend = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2) + SAMPLECOUNT*step;
leftend = mixbuffer + SAMPLECOUNT*step;
// Mix sounds into the mixing buffer.
// Loop over step*SAMPLECOUNT,
@ -467,15 +461,10 @@ void I_UpdateSound( void )
void get_more(unsigned char** start, size_t* size)
{
// This code works fine, the only problem is that doom runs slower then the sound
// updates (sometimes). This code forces the update if the sound hasn't been
// remixed.
if(lastswap!=swap)
I_UpdateSound(); // Force sound update (We don't want stutters)
I_UpdateSound(); // Force sound update
*start = (unsigned char*)((swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2));
*start = (unsigned char*)(mixbuffer);
*size = SAMPLECOUNT*2*sizeof(short);
swap=!swap;
}
@ -520,9 +509,6 @@ void I_InitSound()
printf( " pre-cached all sound data\n");
if(mixbuffer==NULL)
mixbuffer=malloc(sizeof(short)*MIXBUFFERSIZE);
// Now initialize mixbuffer with zero.
for ( i = 0; i< MIXBUFFERSIZE; i++ )
mixbuffer[i] = 0;

View file

@ -16,7 +16,10 @@
* GNU General Public License for more details.
*
* $Log$
* Revision 1.15 2006/04/16 23:14:04 kkurbjun
* Revision 1.16 2006/04/20 19:39:56 kkurbjun
* Optimizations for doom: coldfire asm drawspan routine = not much, fixed point multiply changes = not much, H300 asm lcd update = some, IRAM sound updates and simplifications = more
*
* Revision 1.15 2006-04-16 23:14:04 kkurbjun
* Fix run so that it stays enabled across level loads. Removed some unused code and added some back in for hopeful future use.
*
* Revision 1.14 2006-04-15 22:08:36 kkurbjun
@ -359,10 +362,43 @@ static void I_UploadNewPalette(int pal)
void I_FinishUpdate (void)
{
#if (CONFIG_LCD == LCD_H300) && !defined(SIMULATOR)
/*
Lookup tables are no longer needed (H300 specific, decreases timedemo
by about 500 tics)
*/
#if 1
/* ASM screen update (drops 600 tics (100 asm)) */
asm (
"move.w #33,(%[LCD]) \n" /* Setup the LCD controller */
"clr.w (%[LCD2]) \n"
"move.w #34,(%[LCD]) \n" /* End LCD controller setup */
"move.l #220,%%d2 \n"
"move.l #176,%%d3 \n"
"clr.l %%d1 \n"
"widthloop: \n"
"move.b (%[screenptr])+, %%d1 \n" /* Unrolled by 5 */
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
"move.b (%[screenptr])+, %%d1 \n"
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
"move.b (%[screenptr])+, %%d1 \n"
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
"move.b (%[screenptr])+, %%d1 \n"
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
"move.b (%[screenptr])+, %%d1 \n"
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
"subq.l #5,%%d2 \n"
"bne widthloop \n"
"move.w #220,%%d2 \n"
"subq.l #1,%%d3 \n"
"bne widthloop \n"
: /* outputs */
: /* inputs */
[screenptr] "a" (d_screens[0]),
[palette] "a" (palette),
[LCD] "a" (0xf0000000),
[LCD2] "a" (0xf0000002)
: /* clobbers */
"d1", "d2", "d3"
);
#else
/* C version of above (drops 500 tics) */
// Start the write
*(volatile unsigned short *) 0xf0000000 = 0x21; // register
@ -383,6 +419,8 @@ void I_FinishUpdate (void)
wcnt=0;
hcnt++;
}
#endif
#else
unsigned char paletteIndex;
int x, y;

View file

@ -47,15 +47,15 @@ inline static int FixedMul( int a, int b )
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
// Code contributed by Thom Johansen
register int result;
asm volatile (
asm (
"mac.l %[x],%[y],%%acc0 \n" /* multiply */
"move.l %[y],%%d2 \n"
"mulu.l %[x],%%d2 \n" /* get lower half, avoid emac stall */
"movclr.l %%acc0,%[result] \n" /* get higher half */
"moveq.l #15,%%d1 \n"
"asl.l %%d1,%[result] \n" /* hi <<= 15, plus one free */
"moveq.l #16,%%d1 \n"
"lsr.l %%d1,%%d2 \n" /* (unsigned)lo >>= 16 */
"asl.l #8,%[result] \n" /* hi <<= 15, plus one free */
"asl.l #7,%[result] \n" /* hi <<= 15, plus one free */
"lsr.l #8,%%d2 \n" /* (unsigned)lo >>= 16 */
"lsr.l #8,%%d2 \n" /* (unsigned)lo >>= 16 */
"or.l %%d2 ,%[result] \n" /* combine result */
: /* outputs */
[result]"=&d"(result)
@ -63,7 +63,7 @@ inline static int FixedMul( int a, int b )
[x] "d" (a),
[y] "d" (b)
: /* clobbers */
"d1", "d2"
"d2"
);
return result;
#else

View file

@ -526,16 +526,48 @@ byte *ds_source IBSS_ATTR;
void R_DrawSpan (void)
{
register unsigned count,xfrac = ds_xfrac,yfrac = ds_yfrac;
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
// only slightly faster
asm volatile (
"tst %[count] \n"
"beq endspanloop \n"
"clr.l %%d4 \n"
"spanloop: \n"
"move.l %[xfrac], %%d1 \n"
"move.l %[yfrac], %%d2 \n"
"lsr.l #8,%%d1 \n"
"lsr.l #8,%%d2 \n"
"lsr.l #8,%%d1 \n"
"lsr.l #2,%%d2 \n"
"and.l #63,%%d1 \n"
"and.l #4032,%%d2 \n"
"or.l %%d2, %%d1 \n"
"move.b (%[source], %%d1), %%d4 \n"
"add.l %[ds_xstep], %[xfrac] \n"
"add.l %[ds_ystep], %[yfrac] \n"
"move.b (%[colormap],%%d4.l), (%[dest])+ \n"
"subq.l #1, %[count] \n"
"bne spanloop \n"
"endspanloop: \n"
: /* outputs */
: /* inputs */
[count] "d" (ds_x2-ds_x1+1),
[xfrac] "d" (ds_xfrac),
[yfrac] "d" (ds_yfrac),
[source] "a" (ds_source),
[colormap] "a" (ds_colormap),
[dest] "a" (topleft+ds_y*SCREENWIDTH +ds_x1),
[ds_xstep] "d" (ds_xstep),
[ds_ystep] "d" (ds_ystep)
: /* clobbers */
"d1", "d2", "d4"
);
#else
register unsigned count = ds_x2 - ds_x1 + 1,xfrac = ds_xfrac,yfrac = ds_yfrac;
byte *source;
byte *colormap;
byte *dest;
source = ds_source;
colormap = ds_colormap;
dest = topleft + ds_y*SCREENWIDTH + ds_x1;
count = ds_x2 - ds_x1 + 1;
register byte *source = ds_source;
register byte *colormap = ds_colormap;
register byte *dest = topleft + ds_y*SCREENWIDTH + ds_x1;
while (count)
{
@ -550,6 +582,7 @@ void R_DrawSpan (void)
*dest++ = colormap[source[spot]];
count--;
}
#endif
}
//