FTK change. Added assembly routines for calculating checksums on SPARC platforms.
git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@758 0109f412-320b-0410-ab79-c3e0c5ffbbe6
This commit is contained in:
@@ -26,17 +26,16 @@
|
||||
#include "ftksys.h"
|
||||
|
||||
static FLMUINT32 * gv_pui32CRCTbl = NULL;
|
||||
static FLMBOOL gv_bHaveFastCheckSum = FALSE;
|
||||
|
||||
#if defined( FLM_X86) && (defined( FLM_GNUC) || defined( FLM_WIN) || defined( FLM_NLM))
|
||||
|
||||
static unsigned long gv_mmxCheckSumFlag = 1;
|
||||
|
||||
#if defined( FLM_WATCOM_NLM)
|
||||
|
||||
extern void ftkFastCheckSumMMX(
|
||||
void * pBlk,
|
||||
unsigned long * puiChecksum,
|
||||
unsigned long * puiXORdata,
|
||||
unsigned long * puiXOR,
|
||||
unsigned long uiNumberOfBytes);
|
||||
|
||||
extern unsigned long ftkGetMMXSupported(void);
|
||||
@@ -46,13 +45,26 @@ static FLMUINT32 * gv_pui32CRCTbl = NULL;
|
||||
static void ftkFastCheckSumMMX(
|
||||
void * pBlk,
|
||||
unsigned long * puiChecksum,
|
||||
unsigned long * puiXORdata,
|
||||
unsigned long * puiXOR,
|
||||
unsigned long uiNumberOfBytes);
|
||||
|
||||
static unsigned long ftkGetMMXSupported(void);
|
||||
|
||||
#endif
|
||||
|
||||
#elif defined( FLM_SPARC_PLUS)
|
||||
|
||||
extern "C" void sparc_calc_checksum(
|
||||
void * pBlk,
|
||||
unsigned long * puiChecksum,
|
||||
unsigned long * puiXOR,
|
||||
unsigned long uiNumberOfBytes);
|
||||
|
||||
extern "C" void sparc_calc_xor(
|
||||
void * pBlk,
|
||||
unsigned long * puiXOR,
|
||||
unsigned long uiNumberOfBytes);
|
||||
|
||||
#endif
|
||||
|
||||
/********************************************************************
|
||||
@@ -229,7 +241,7 @@ Desc:
|
||||
static void ftkFastCheckSumMMX(
|
||||
void * pBlk,
|
||||
unsigned long * puiChecksum,
|
||||
unsigned long * puiXORdata,
|
||||
unsigned long * puiXOR,
|
||||
unsigned long uiNumberOfBytes)
|
||||
{
|
||||
__asm
|
||||
@@ -241,7 +253,7 @@ static void ftkFastCheckSumMMX(
|
||||
mov eax, puiChecksum
|
||||
mov edx, [eax]
|
||||
and edx, 0ffh ;clear unneeded bits
|
||||
mov eax, puiXORdata
|
||||
mov eax, puiXOR
|
||||
mov ebx, [eax]
|
||||
and ebx, 0ffh ;clear unneeded bits
|
||||
mov ecx, uiNumberOfBytes
|
||||
@@ -349,7 +361,7 @@ Done:
|
||||
mov eax, puiChecksum
|
||||
mov [eax], edx
|
||||
|
||||
mov eax, puiXORdata
|
||||
mov eax, puiXOR
|
||||
mov [eax], ebx
|
||||
}
|
||||
return;
|
||||
@@ -363,7 +375,7 @@ Desc:
|
||||
static void ftkFastCheckSumMMX(
|
||||
void * pBlk,
|
||||
unsigned long * puiChecksum,
|
||||
unsigned long * puiXORdata,
|
||||
unsigned long * puiXOR,
|
||||
unsigned long uiNumberOfBytes)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
@@ -465,8 +477,8 @@ static void ftkFastCheckSumMMX(
|
||||
" mov %1, %%eax\n"
|
||||
" mov %%ebx, (%%eax)\n"
|
||||
" pop %%ebx\n"
|
||||
: "=m" (puiChecksum), "=m" (puiXORdata)
|
||||
: "m" (pBlk), "m" (puiChecksum), "m" (puiXORdata), "m" (uiNumberOfBytes)
|
||||
: "=m" (puiChecksum), "=m" (puiXOR)
|
||||
: "m" (pBlk), "m" (puiChecksum), "m" (puiXOR), "m" (uiNumberOfBytes)
|
||||
: "%eax", "%ecx", "%edx", "%esi", "%edi");
|
||||
}
|
||||
#endif
|
||||
@@ -478,7 +490,7 @@ Desc:
|
||||
static void ftkFastCheckSumMMX(
|
||||
void * pBlk,
|
||||
unsigned long * puiChecksum,
|
||||
unsigned long * puiXORdata,
|
||||
unsigned long * puiXOR,
|
||||
unsigned long uiNumberOfBytes)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
@@ -578,12 +590,178 @@ static void ftkFastCheckSumMMX(
|
||||
|
||||
" mov %1, %%r9\n"
|
||||
" mov %%ebx, (%%r9)\n"
|
||||
: "=m" (puiChecksum), "=m" (puiXORdata)
|
||||
: "m" (pBlk), "m" (puiChecksum), "m" (puiXORdata), "m" (uiNumberOfBytes)
|
||||
: "=m" (puiChecksum), "=m" (puiXOR)
|
||||
: "m" (pBlk), "m" (puiChecksum), "m" (puiXOR), "m" (uiNumberOfBytes)
|
||||
: "%eax", "%ebx", "%ecx", "%edi", "%edx", "%r8", "%r9");
|
||||
}
|
||||
#endif
|
||||
|
||||
/****************************************************************************
|
||||
Desc:
|
||||
****************************************************************************/
|
||||
#if defined( FLM_SPARC_PLUS)
|
||||
void sparc_csum_code( void)
|
||||
{
|
||||
// Calculate the sum and xor bytes of a checksum
|
||||
|
||||
asm( ".global sparc_calc_checksum");
|
||||
#ifdef FLM_64BIT
|
||||
asm( ".align 8");
|
||||
#else
|
||||
asm( ".align 4");
|
||||
#endif
|
||||
asm( "sparc_calc_checksum:");
|
||||
asm( " save %sp, -96, %sp");
|
||||
|
||||
#ifdef FLM_64BIT
|
||||
asm( " ldx [%i1], %l1");
|
||||
asm( " ldx [%i2], %l2");
|
||||
#else
|
||||
asm( " ld [%i1], %l1");
|
||||
asm( " ld [%i2], %l2");
|
||||
#endif
|
||||
|
||||
asm( " mov %i0, %l0");
|
||||
asm( " mov %i3, %l3");
|
||||
|
||||
asm( " clr %l4");
|
||||
|
||||
asm( " csum_loop:");
|
||||
asm( " ldub [%l0], %l4");
|
||||
asm( " add %l4, %l1, %l1");
|
||||
asm( " xor %l4, %l2, %l2");
|
||||
asm( " inc %l0");
|
||||
asm( " subcc %l3, 1, %l3");
|
||||
asm( " bg csum_loop");
|
||||
asm( " nop");
|
||||
|
||||
asm( " and %l1, 0xFF, %l1");
|
||||
asm( " and %l2, 0xFF, %l2");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " stx %l1, [%i1]");
|
||||
asm( " stx %l2, [%i2]");
|
||||
#else
|
||||
asm( " st %l1, [%i1]");
|
||||
asm( " st %l2, [%i2]");
|
||||
#endif
|
||||
asm( " ret");
|
||||
asm( " restore");
|
||||
asm( ".type sparc_calc_checksum, #function");
|
||||
asm( ".size sparc_calc_checksum, (.-sparc_calc_checksum)");
|
||||
|
||||
// Calculate the xor byte of a checksum
|
||||
|
||||
asm( ".global sparc_calc_xor");
|
||||
#ifdef FLM_64BIT
|
||||
asm( ".align 8");
|
||||
#else
|
||||
asm( ".align 4");
|
||||
#endif
|
||||
asm( "sparc_calc_xor:");
|
||||
asm( " save %sp, -96, %sp");
|
||||
|
||||
asm( " mov %i0, %l0");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " ldx [%i1], %l1");
|
||||
#else
|
||||
asm( " ld [%i1], %l1");
|
||||
#endif
|
||||
|
||||
asm( " clr %l3");
|
||||
|
||||
asm( " mov %i2, %l2");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " and %l2, 0x7, %l2");
|
||||
#else
|
||||
asm( " and %l2, 0x3, %l2");
|
||||
#endif
|
||||
asm( " cmp %l2, 0");
|
||||
asm( " be init_main_xor_loop");
|
||||
asm( " nop");
|
||||
|
||||
asm( " lead_xor_loop:");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " ldx [%l0], %l3");
|
||||
#else
|
||||
asm( " ld [%l0], %l3");
|
||||
#endif
|
||||
asm( " xor %l3, %l1, %l1");
|
||||
asm( " inc %l0");
|
||||
asm( " deccc %l2");
|
||||
asm( " bg lead_xor_loop");
|
||||
asm( " nop");
|
||||
|
||||
asm( " init_main_xor_loop:");
|
||||
asm( " mov %i2, %l2");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " andn %l2, 0x7, %l2");
|
||||
asm( " cmp %l2, 8");
|
||||
#else
|
||||
asm( " andn %l2, 0x3, %l2");
|
||||
asm( " cmp %l2, 4");
|
||||
#endif
|
||||
asm( " bl init_tail_xor_loop");
|
||||
asm( " nop");
|
||||
|
||||
asm( " main_xor_loop:");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " ldx [%l0], %l3");
|
||||
#else
|
||||
asm( " ld [%l0], %l3");
|
||||
#endif
|
||||
asm( " xor %l3, %l1, %l1");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " add %l0, 8, %l0");
|
||||
asm( " subcc %l2, 8, %l2");
|
||||
#else
|
||||
asm( " add %l0, 4, %l0");
|
||||
asm( " subcc %l2, 4, %l2");
|
||||
#endif
|
||||
asm( " bg main_xor_loop");
|
||||
asm( " nop");
|
||||
|
||||
asm( " init_tail_xor_loop:");
|
||||
asm( " cmp %i2, 0");
|
||||
asm( " be done");
|
||||
|
||||
asm( " tail_xor_loop:");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " ldx [%l0], %l3");
|
||||
#else
|
||||
asm( " ld [%l0], %l3");
|
||||
#endif
|
||||
asm( " xor %l3, %l1, %l1");
|
||||
asm( " inc %l0");
|
||||
asm( " deccc %l2");
|
||||
asm( " bg tail_xor_loop");
|
||||
asm( " nop");
|
||||
|
||||
asm( " done:");
|
||||
asm( " mov %l1, %l3");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " mov 7, %l2");
|
||||
#else
|
||||
asm( " mov 3, %l2");
|
||||
#endif
|
||||
asm( " xor_assemble_loop:");
|
||||
asm( " srlx %l3, 8, %l3");
|
||||
asm( " xor %l3, %l1, %l1");
|
||||
asm( " subcc %l2, 1, %l2");
|
||||
asm( " bg xor_assemble_loop");
|
||||
asm( " nop");
|
||||
asm( " and %l1, 0xFF, %l1");
|
||||
#ifdef FLM_64BIT
|
||||
asm( " stx %l1, [%i1]");
|
||||
#else
|
||||
asm( " st %l1, [%i1]");
|
||||
#endif
|
||||
asm( " ret");
|
||||
asm( " restore");
|
||||
asm( ".type sparc_calc_xor, #function");
|
||||
asm( ".size sparc_calc_xor, (.-sparc_calc_xor)");
|
||||
}
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
Desc: Sets the global variable to check if MMX instructions are allowed.
|
||||
******************************************************************************/
|
||||
@@ -599,7 +777,9 @@ void f_initFastCheckSum( void)
|
||||
// can do MMX instructions - unless you can assume that even on NT you
|
||||
// will be on at least a P5.
|
||||
|
||||
gv_mmxCheckSumFlag = ftkGetMMXSupported();
|
||||
gv_bHaveFastCheckSum = ftkGetMMXSupported() ? TRUE : FALSE;
|
||||
#elif defined( FLM_SPARC_PLUS)
|
||||
gv_bHaveFastCheckSum = TRUE;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -630,13 +810,16 @@ FLMUINT32 FLMAPI f_calcFastChecksum(
|
||||
}
|
||||
|
||||
#if defined( FLM_X86) && (defined( FLM_GNUC) || defined( FLM_WIN) || defined( FLM_NLM))
|
||||
if( gv_mmxCheckSumFlag == 1)
|
||||
if( gv_bHaveFastCheckSum)
|
||||
{
|
||||
ftkFastCheckSumMMX( (void *) pucData, (unsigned long *) &uiAdds,
|
||||
(unsigned long *) &uiXORs, (unsigned long) uiLength);
|
||||
}
|
||||
else
|
||||
#elif defined( FLM_SPARC_PLUS)
|
||||
sparc_calc_checksum( pucData, &uiAdds, &uiXORs, uiLength);
|
||||
#endif
|
||||
|
||||
if( !gv_bHaveFastCheckSum)
|
||||
{
|
||||
FLMBYTE * pucCur = pucData;
|
||||
FLMBYTE * pucEnd = pucData + uiLength;
|
||||
@@ -767,18 +950,21 @@ FLMBYTE FLMAPI f_calcPacketChecksum(
|
||||
FLMUINT uiBytesToChecksum)
|
||||
{
|
||||
FLMUINT uiChecksum = 0;
|
||||
unsigned long uiAdds = 0;
|
||||
unsigned long uiXORs = 0;
|
||||
|
||||
#if defined( FLM_X86) && (defined( FLM_GNUC) || defined( FLM_WIN) || defined( FLM_NLM))
|
||||
if( gv_mmxCheckSumFlag == 1)
|
||||
if( gv_bHaveFastCheckSum)
|
||||
{
|
||||
unsigned long uiAdds = 0;
|
||||
unsigned long uiXORs = 0;
|
||||
|
||||
ftkFastCheckSumMMX( pucPacket, &uiAdds, &uiXORs, uiBytesToChecksum);
|
||||
uiChecksum = uiXORs;
|
||||
}
|
||||
else
|
||||
#elif defined( FLM_SPARC_PLUS)
|
||||
sparc_calc_xor( pucPacket, &uiXORs, uiBytesToChecksum);
|
||||
uiChecksum = uiXORs;
|
||||
#endif
|
||||
|
||||
if( !gv_bHaveFastCheckSum)
|
||||
{
|
||||
FLMBYTE * pucEnd;
|
||||
FLMBYTE * pucSectionEnd;
|
||||
|
||||
@@ -1323,13 +1323,13 @@ void sparc_asm_code( void)
|
||||
asm( ".type sparc_atomic_add_32, #function");
|
||||
asm( "sparc_atomic_add_32:");
|
||||
asm( " membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad");
|
||||
asm( " ld [%o0], %l0");
|
||||
asm( " add %l0, %o1, %l2");
|
||||
asm( " cas [%o0], %l0, %l2");
|
||||
asm( " cmp %l0, %l2");
|
||||
asm( " ld [%o0], %o2");
|
||||
asm( " add %o2, %o1, %o3");
|
||||
asm( " cas [%o0], %o2, %o3");
|
||||
asm( " cmp %o2, %o3");
|
||||
asm( " bne sparc_atomic_add_32");
|
||||
asm( " nop");
|
||||
asm( " add %l2, %o1, %o0");
|
||||
asm( " add %o3, %o1, %o0");
|
||||
asm( " membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad");
|
||||
asm( "retl");
|
||||
asm( "nop");
|
||||
@@ -1339,13 +1339,13 @@ void sparc_asm_code( void)
|
||||
asm( ".type sparc_atomic_xchg_32, #function");
|
||||
asm( "sparc_atomic_xchg_32:");
|
||||
asm( " membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad");
|
||||
asm( " ld [%o0], %l0");
|
||||
asm( " mov %o1, %l1");
|
||||
asm( " cas [%o0], %l0, %l1");
|
||||
asm( " cmp %l0, %l1");
|
||||
asm( " ld [%o0], %o2");
|
||||
asm( " mov %o1, %o3");
|
||||
asm( " cas [%o0], %o2, %o3");
|
||||
asm( " cmp %o2, %o3");
|
||||
asm( " bne sparc_atomic_xchg_32");
|
||||
asm( " nop");
|
||||
asm( " mov %l0, %o0");
|
||||
asm( " mov %o2, %o0");
|
||||
asm( " membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad");
|
||||
asm( "retl");
|
||||
asm( "nop");
|
||||
|
||||
@@ -409,7 +409,7 @@ FSTATIC RCODE ftkFastChecksumTest( void)
|
||||
FLMUINT uiSlowTime = 0;
|
||||
FLMUINT uiFastTime = 0;
|
||||
|
||||
f_printf( "Running checksum tests ... ");
|
||||
f_printf( "Running checksum tests ");
|
||||
|
||||
uiDataLength = 8192;
|
||||
if( RC_BAD( rc = f_alloc( uiDataLength, &pucData)))
|
||||
@@ -466,9 +466,14 @@ FSTATIC RCODE ftkFastChecksumTest( void)
|
||||
rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE);
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
if( (uiIter % 100) == 0)
|
||||
{
|
||||
f_printf( ".");
|
||||
}
|
||||
}
|
||||
|
||||
f_printf( "Slow time = %u ms, Fast time = %u ms. ",
|
||||
f_printf( " Slow time = %u ms, Fast time = %u ms. ",
|
||||
(unsigned)FLM_TIMER_UNITS_TO_MILLI( uiSlowTime),
|
||||
(unsigned)FLM_TIMER_UNITS_TO_MILLI( uiFastTime));
|
||||
|
||||
@@ -501,7 +506,7 @@ FSTATIC RCODE ftkPacketChecksumTest( void)
|
||||
FLMUINT uiSlowTime = 0;
|
||||
FLMUINT uiFastTime = 0;
|
||||
|
||||
f_printf( "Running checksum tests ... ");
|
||||
f_printf( "Running checksum tests ");
|
||||
|
||||
uiDataLength = 64 * 1024;
|
||||
if( RC_BAD( rc = f_alloc( uiDataLength, &pucData)))
|
||||
@@ -535,9 +540,14 @@ FSTATIC RCODE ftkPacketChecksumTest( void)
|
||||
rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE);
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
if( (uiIter % 100) == 0)
|
||||
{
|
||||
f_printf( ".");
|
||||
}
|
||||
}
|
||||
|
||||
f_printf( "Slow time = %u ms, Fast time = %u ms. ",
|
||||
f_printf( " Slow time = %u ms, Fast time = %u ms. ",
|
||||
(unsigned)FLM_TIMER_UNITS_TO_MILLI( uiSlowTime),
|
||||
(unsigned)FLM_TIMER_UNITS_TO_MILLI( uiFastTime));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user