Optimizations for doom: coldfire asm drawspan routine = not much, fixed point multiply changes = not much, H300 asm lcd update = some, IRAM sound updates and simplifications = more
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9747 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
9e9921b087
commit
29ab31e8f1
4 changed files with 106 additions and 49 deletions
|
@ -48,11 +48,11 @@
|
|||
// mixing buffer, and the samplerate of the raw data.
|
||||
|
||||
// Needed for calling the actual sound output.
|
||||
#define SAMPLECOUNT 512
|
||||
#define SAMPLECOUNT 512
|
||||
|
||||
#define NUM_CHANNELS 16
|
||||
#define NUM_CHANNELS 16
|
||||
// It is 2 for 16bit, and 2 for two channels.
|
||||
#define BUFMUL 4
|
||||
#define BUFMUL 2
|
||||
#define MIXBUFFERSIZE (SAMPLECOUNT*BUFMUL)
|
||||
|
||||
#if (CONFIG_KEYPAD == IPOD_3G_PAD) || (CONFIG_KEYPAD == IPOD_4G_PAD)
|
||||
|
@ -66,7 +66,7 @@
|
|||
// Basically, samples from all active internal channels
|
||||
// are modifed and added, and stored in the buffer
|
||||
// that is submitted to the audio device.
|
||||
signed short *mixbuffer=NULL;
|
||||
signed short mixbuffer[MIXBUFFERSIZE] IBSS_ATTR;
|
||||
|
||||
typedef struct {
|
||||
// SFX id of the playing sound effect.
|
||||
|
@ -91,7 +91,7 @@ typedef struct {
|
|||
int *rightvol_lookup;
|
||||
} channel_info_t;
|
||||
|
||||
channel_info_t channelinfo[NUM_CHANNELS];
|
||||
channel_info_t channelinfo[NUM_CHANNELS] IBSS_ATTR;
|
||||
|
||||
int *vol_lookup; // Volume lookups.
|
||||
|
||||
|
@ -355,13 +355,6 @@ int I_SoundIsPlaying(int handle)
|
|||
// This function currently supports only 16bit.
|
||||
//
|
||||
|
||||
bool swap=0;
|
||||
bool lastswap=1;
|
||||
// Pointers in global mixbuffer, left, right, end.
|
||||
signed short* leftout;
|
||||
signed short* rightout;
|
||||
signed short* leftend;
|
||||
|
||||
void I_UpdateSound( void )
|
||||
{
|
||||
// Mix current sound data.
|
||||
|
@ -370,25 +363,26 @@ void I_UpdateSound( void )
|
|||
register int dl;
|
||||
register int dr;
|
||||
|
||||
// Pointers in global mixbuffer, left, right, end.
|
||||
signed short* leftout;
|
||||
signed short* rightout;
|
||||
signed short* leftend;
|
||||
|
||||
// Step in mixbuffer, left and right, thus two.
|
||||
int step;
|
||||
|
||||
// Mixing channel index.
|
||||
int chan;
|
||||
|
||||
if(lastswap==swap)
|
||||
return;
|
||||
lastswap=swap;
|
||||
|
||||
// Left and right channel
|
||||
// are in global mixbuffer, alternating.
|
||||
leftout = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2);
|
||||
rightout = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2)+1;
|
||||
leftout = mixbuffer;
|
||||
rightout = mixbuffer +1;
|
||||
step = 2;
|
||||
|
||||
// Determine end, for left channel only
|
||||
// (right channel is implicit).
|
||||
leftend = (swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2) + SAMPLECOUNT*step;
|
||||
leftend = mixbuffer + SAMPLECOUNT*step;
|
||||
|
||||
// Mix sounds into the mixing buffer.
|
||||
// Loop over step*SAMPLECOUNT,
|
||||
|
@ -467,15 +461,10 @@ void I_UpdateSound( void )
|
|||
|
||||
void get_more(unsigned char** start, size_t* size)
|
||||
{
|
||||
// This code works fine, the only problem is that doom runs slower then the sound
|
||||
// updates (sometimes). This code forces the update if the sound hasn't been
|
||||
// remixed.
|
||||
if(lastswap!=swap)
|
||||
I_UpdateSound(); // Force sound update (We don't want stutters)
|
||||
I_UpdateSound(); // Force sound update
|
||||
|
||||
*start = (unsigned char*)((swap ? mixbuffer : mixbuffer + SAMPLECOUNT*2));
|
||||
*start = (unsigned char*)(mixbuffer);
|
||||
*size = SAMPLECOUNT*2*sizeof(short);
|
||||
swap=!swap;
|
||||
}
|
||||
|
||||
|
||||
|
@ -520,9 +509,6 @@ void I_InitSound()
|
|||
|
||||
printf( " pre-cached all sound data\n");
|
||||
|
||||
if(mixbuffer==NULL)
|
||||
mixbuffer=malloc(sizeof(short)*MIXBUFFERSIZE);
|
||||
|
||||
// Now initialize mixbuffer with zero.
|
||||
for ( i = 0; i< MIXBUFFERSIZE; i++ )
|
||||
mixbuffer[i] = 0;
|
||||
|
|
|
@ -16,7 +16,10 @@
|
|||
* GNU General Public License for more details.
|
||||
*
|
||||
* $Log$
|
||||
* Revision 1.15 2006/04/16 23:14:04 kkurbjun
|
||||
* Revision 1.16 2006/04/20 19:39:56 kkurbjun
|
||||
* Optimizations for doom: coldfire asm drawspan routine = not much, fixed point multiply changes = not much, H300 asm lcd update = some, IRAM sound updates and simplifications = more
|
||||
*
|
||||
* Revision 1.15 2006-04-16 23:14:04 kkurbjun
|
||||
* Fix run so that it stays enabled across level loads. Removed some unused code and added some back in for hopeful future use.
|
||||
*
|
||||
* Revision 1.14 2006-04-15 22:08:36 kkurbjun
|
||||
|
@ -359,10 +362,43 @@ static void I_UploadNewPalette(int pal)
|
|||
void I_FinishUpdate (void)
|
||||
{
|
||||
#if (CONFIG_LCD == LCD_H300) && !defined(SIMULATOR)
|
||||
/*
|
||||
Lookup tables are no longer needed (H300 specific, decreases timedemo
|
||||
by about 500 tics)
|
||||
*/
|
||||
|
||||
#if 1
|
||||
/* ASM screen update (drops 600 tics (100 asm)) */
|
||||
asm (
|
||||
"move.w #33,(%[LCD]) \n" /* Setup the LCD controller */
|
||||
"clr.w (%[LCD2]) \n"
|
||||
"move.w #34,(%[LCD]) \n" /* End LCD controller setup */
|
||||
"move.l #220,%%d2 \n"
|
||||
"move.l #176,%%d3 \n"
|
||||
"clr.l %%d1 \n"
|
||||
"widthloop: \n"
|
||||
"move.b (%[screenptr])+, %%d1 \n" /* Unrolled by 5 */
|
||||
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
|
||||
"move.b (%[screenptr])+, %%d1 \n"
|
||||
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
|
||||
"move.b (%[screenptr])+, %%d1 \n"
|
||||
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
|
||||
"move.b (%[screenptr])+, %%d1 \n"
|
||||
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
|
||||
"move.b (%[screenptr])+, %%d1 \n"
|
||||
"move.w (%[palette], %%d1.l:2), (%[LCD2]) \n"
|
||||
"subq.l #5,%%d2 \n"
|
||||
"bne widthloop \n"
|
||||
"move.w #220,%%d2 \n"
|
||||
"subq.l #1,%%d3 \n"
|
||||
"bne widthloop \n"
|
||||
: /* outputs */
|
||||
: /* inputs */
|
||||
[screenptr] "a" (d_screens[0]),
|
||||
[palette] "a" (palette),
|
||||
[LCD] "a" (0xf0000000),
|
||||
[LCD2] "a" (0xf0000002)
|
||||
: /* clobbers */
|
||||
"d1", "d2", "d3"
|
||||
);
|
||||
#else
|
||||
/* C version of above (drops 500 tics) */
|
||||
|
||||
// Start the write
|
||||
*(volatile unsigned short *) 0xf0000000 = 0x21; // register
|
||||
|
@ -383,6 +419,8 @@ void I_FinishUpdate (void)
|
|||
wcnt=0;
|
||||
hcnt++;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
unsigned char paletteIndex;
|
||||
int x, y;
|
||||
|
|
|
@ -47,15 +47,15 @@ inline static int FixedMul( int a, int b )
|
|||
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
|
||||
// Code contributed by Thom Johansen
|
||||
register int result;
|
||||
asm volatile (
|
||||
asm (
|
||||
"mac.l %[x],%[y],%%acc0 \n" /* multiply */
|
||||
"move.l %[y],%%d2 \n"
|
||||
"mulu.l %[x],%%d2 \n" /* get lower half, avoid emac stall */
|
||||
"movclr.l %%acc0,%[result] \n" /* get higher half */
|
||||
"moveq.l #15,%%d1 \n"
|
||||
"asl.l %%d1,%[result] \n" /* hi <<= 15, plus one free */
|
||||
"moveq.l #16,%%d1 \n"
|
||||
"lsr.l %%d1,%%d2 \n" /* (unsigned)lo >>= 16 */
|
||||
"asl.l #8,%[result] \n" /* hi <<= 15, plus one free */
|
||||
"asl.l #7,%[result] \n" /* hi <<= 15, plus one free */
|
||||
"lsr.l #8,%%d2 \n" /* (unsigned)lo >>= 16 */
|
||||
"lsr.l #8,%%d2 \n" /* (unsigned)lo >>= 16 */
|
||||
"or.l %%d2 ,%[result] \n" /* combine result */
|
||||
: /* outputs */
|
||||
[result]"=&d"(result)
|
||||
|
@ -63,7 +63,7 @@ inline static int FixedMul( int a, int b )
|
|||
[x] "d" (a),
|
||||
[y] "d" (b)
|
||||
: /* clobbers */
|
||||
"d1", "d2"
|
||||
"d2"
|
||||
);
|
||||
return result;
|
||||
#else
|
||||
|
|
|
@ -526,16 +526,48 @@ byte *ds_source IBSS_ATTR;
|
|||
|
||||
void R_DrawSpan (void)
|
||||
{
|
||||
register unsigned count,xfrac = ds_xfrac,yfrac = ds_yfrac;
|
||||
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
|
||||
// only slightly faster
|
||||
asm volatile (
|
||||
"tst %[count] \n"
|
||||
"beq endspanloop \n"
|
||||
"clr.l %%d4 \n"
|
||||
"spanloop: \n"
|
||||
"move.l %[xfrac], %%d1 \n"
|
||||
"move.l %[yfrac], %%d2 \n"
|
||||
"lsr.l #8,%%d1 \n"
|
||||
"lsr.l #8,%%d2 \n"
|
||||
"lsr.l #8,%%d1 \n"
|
||||
"lsr.l #2,%%d2 \n"
|
||||
"and.l #63,%%d1 \n"
|
||||
"and.l #4032,%%d2 \n"
|
||||
"or.l %%d2, %%d1 \n"
|
||||
"move.b (%[source], %%d1), %%d4 \n"
|
||||
"add.l %[ds_xstep], %[xfrac] \n"
|
||||
"add.l %[ds_ystep], %[yfrac] \n"
|
||||
"move.b (%[colormap],%%d4.l), (%[dest])+ \n"
|
||||
"subq.l #1, %[count] \n"
|
||||
"bne spanloop \n"
|
||||
"endspanloop: \n"
|
||||
: /* outputs */
|
||||
: /* inputs */
|
||||
[count] "d" (ds_x2-ds_x1+1),
|
||||
[xfrac] "d" (ds_xfrac),
|
||||
[yfrac] "d" (ds_yfrac),
|
||||
[source] "a" (ds_source),
|
||||
[colormap] "a" (ds_colormap),
|
||||
[dest] "a" (topleft+ds_y*SCREENWIDTH +ds_x1),
|
||||
[ds_xstep] "d" (ds_xstep),
|
||||
[ds_ystep] "d" (ds_ystep)
|
||||
: /* clobbers */
|
||||
"d1", "d2", "d4"
|
||||
);
|
||||
#else
|
||||
register unsigned count = ds_x2 - ds_x1 + 1,xfrac = ds_xfrac,yfrac = ds_yfrac;
|
||||
|
||||
byte *source;
|
||||
byte *colormap;
|
||||
byte *dest;
|
||||
|
||||
source = ds_source;
|
||||
colormap = ds_colormap;
|
||||
dest = topleft + ds_y*SCREENWIDTH + ds_x1;
|
||||
count = ds_x2 - ds_x1 + 1;
|
||||
register byte *source = ds_source;
|
||||
register byte *colormap = ds_colormap;
|
||||
register byte *dest = topleft + ds_y*SCREENWIDTH + ds_x1;
|
||||
|
||||
while (count)
|
||||
{
|
||||
|
@ -550,6 +582,7 @@ void R_DrawSpan (void)
|
|||
*dest++ = colormap[source[spot]];
|
||||
count--;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
|
|
Loading…
Reference in a new issue