From 7c9bb608c9a15eacd10dae8d6f0d9fa76dc01787 Mon Sep 17 00:00:00 2001 From: ahodgkinson Date: Thu, 10 Aug 2006 21:33:06 +0000 Subject: [PATCH] FTK change. Added assembly routines for calculating checksums on SPARC platforms. git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@758 0109f412-320b-0410-ab79-c3e0c5ffbbe6 --- ftk/src/ftkcsum.cpp | 228 +++++++++++++++++++++++++++++++++++++++---- ftk/src/ftkunix.cpp | 20 ++-- ftk/util/ftktest.cpp | 18 +++- 3 files changed, 231 insertions(+), 35 deletions(-) diff --git a/ftk/src/ftkcsum.cpp b/ftk/src/ftkcsum.cpp index c78cd65..6802249 100644 --- a/ftk/src/ftkcsum.cpp +++ b/ftk/src/ftkcsum.cpp @@ -26,17 +26,16 @@ #include "ftksys.h" static FLMUINT32 * gv_pui32CRCTbl = NULL; +static FLMBOOL gv_bHaveFastCheckSum = FALSE; #if defined( FLM_X86) && (defined( FLM_GNUC) || defined( FLM_WIN) || defined( FLM_NLM)) - static unsigned long gv_mmxCheckSumFlag = 1; - #if defined( FLM_WATCOM_NLM) extern void ftkFastCheckSumMMX( void * pBlk, unsigned long * puiChecksum, - unsigned long * puiXORdata, + unsigned long * puiXOR, unsigned long uiNumberOfBytes); extern unsigned long ftkGetMMXSupported(void); @@ -46,13 +45,26 @@ static FLMUINT32 * gv_pui32CRCTbl = NULL; static void ftkFastCheckSumMMX( void * pBlk, unsigned long * puiChecksum, - unsigned long * puiXORdata, + unsigned long * puiXOR, unsigned long uiNumberOfBytes); static unsigned long ftkGetMMXSupported(void); #endif + +#elif defined( FLM_SPARC_PLUS) + extern "C" void sparc_calc_checksum( + void * pBlk, + unsigned long * puiChecksum, + unsigned long * puiXOR, + unsigned long uiNumberOfBytes); + + extern "C" void sparc_calc_xor( + void * pBlk, + unsigned long * puiXOR, + unsigned long uiNumberOfBytes); + #endif /******************************************************************** @@ -229,7 +241,7 @@ Desc: static void ftkFastCheckSumMMX( void * pBlk, unsigned long * puiChecksum, - unsigned long * puiXORdata, + unsigned long * puiXOR, unsigned long uiNumberOfBytes) { __asm @@ -241,7 +253,7 @@ static void ftkFastCheckSumMMX( mov eax, puiChecksum mov edx, [eax] and edx, 0ffh ;clear unneeded bits - mov eax, puiXORdata + mov eax, puiXOR mov ebx, [eax] and ebx, 0ffh ;clear unneeded bits mov ecx, uiNumberOfBytes @@ -349,7 +361,7 @@ Done: mov eax, puiChecksum mov [eax], edx - mov eax, puiXORdata + mov eax, puiXOR mov [eax], ebx } return; @@ -363,7 +375,7 @@ Desc: static void ftkFastCheckSumMMX( void * pBlk, unsigned long * puiChecksum, - unsigned long * puiXORdata, + unsigned long * puiXOR, unsigned long uiNumberOfBytes) { __asm__ __volatile__( @@ -465,8 +477,8 @@ static void ftkFastCheckSumMMX( " mov %1, %%eax\n" " mov %%ebx, (%%eax)\n" " pop %%ebx\n" - : "=m" (puiChecksum), "=m" (puiXORdata) - : "m" (pBlk), "m" (puiChecksum), "m" (puiXORdata), "m" (uiNumberOfBytes) + : "=m" (puiChecksum), "=m" (puiXOR) + : "m" (pBlk), "m" (puiChecksum), "m" (puiXOR), "m" (uiNumberOfBytes) : "%eax", "%ecx", "%edx", "%esi", "%edi"); } #endif @@ -478,7 +490,7 @@ Desc: static void ftkFastCheckSumMMX( void * pBlk, unsigned long * puiChecksum, - unsigned long * puiXORdata, + unsigned long * puiXOR, unsigned long uiNumberOfBytes) { __asm__ __volatile__( @@ -578,12 +590,178 @@ static void ftkFastCheckSumMMX( " mov %1, %%r9\n" " mov %%ebx, (%%r9)\n" - : "=m" (puiChecksum), "=m" (puiXORdata) - : "m" (pBlk), "m" (puiChecksum), "m" (puiXORdata), "m" (uiNumberOfBytes) + : "=m" (puiChecksum), "=m" (puiXOR) + : "m" (pBlk), "m" (puiChecksum), "m" (puiXOR), "m" (uiNumberOfBytes) : "%eax", "%ebx", "%ecx", "%edi", "%edx", "%r8", "%r9"); } #endif +/**************************************************************************** +Desc: +****************************************************************************/ +#if defined( FLM_SPARC_PLUS) +void sparc_csum_code( void) +{ + // Calculate the sum and xor bytes of a checksum + + asm( ".global sparc_calc_checksum"); +#ifdef FLM_64BIT + asm( ".align 8"); +#else + asm( ".align 4"); +#endif + asm( "sparc_calc_checksum:"); + asm( " save %sp, -96, %sp"); + +#ifdef FLM_64BIT + asm( " ldx [%i1], %l1"); + asm( " ldx [%i2], %l2"); +#else + asm( " ld [%i1], %l1"); + asm( " ld [%i2], %l2"); +#endif + + asm( " mov %i0, %l0"); + asm( " mov %i3, %l3"); + + asm( " clr %l4"); + + asm( " csum_loop:"); + asm( " ldub [%l0], %l4"); + asm( " add %l4, %l1, %l1"); + asm( " xor %l4, %l2, %l2"); + asm( " inc %l0"); + asm( " subcc %l3, 1, %l3"); + asm( " bg csum_loop"); + asm( " nop"); + + asm( " and %l1, 0xFF, %l1"); + asm( " and %l2, 0xFF, %l2"); +#ifdef FLM_64BIT + asm( " stx %l1, [%i1]"); + asm( " stx %l2, [%i2]"); +#else + asm( " st %l1, [%i1]"); + asm( " st %l2, [%i2]"); +#endif + asm( " ret"); + asm( " restore"); + asm( ".type sparc_calc_checksum, #function"); + asm( ".size sparc_calc_checksum, (.-sparc_calc_checksum)"); + + // Calculate the xor byte of a checksum + + asm( ".global sparc_calc_xor"); +#ifdef FLM_64BIT + asm( ".align 8"); +#else + asm( ".align 4"); +#endif + asm( "sparc_calc_xor:"); + asm( " save %sp, -96, %sp"); + + asm( " mov %i0, %l0"); +#ifdef FLM_64BIT + asm( " ldx [%i1], %l1"); +#else + asm( " ld [%i1], %l1"); +#endif + + asm( " clr %l3"); + + asm( " mov %i2, %l2"); +#ifdef FLM_64BIT + asm( " and %l2, 0x7, %l2"); +#else + asm( " and %l2, 0x3, %l2"); +#endif + asm( " cmp %l2, 0"); + asm( " be init_main_xor_loop"); + asm( " nop"); + + asm( " lead_xor_loop:"); +#ifdef FLM_64BIT + asm( " ldx [%l0], %l3"); +#else + asm( " ld [%l0], %l3"); +#endif + asm( " xor %l3, %l1, %l1"); + asm( " inc %l0"); + asm( " deccc %l2"); + asm( " bg lead_xor_loop"); + asm( " nop"); + + asm( " init_main_xor_loop:"); + asm( " mov %i2, %l2"); +#ifdef FLM_64BIT + asm( " andn %l2, 0x7, %l2"); + asm( " cmp %l2, 8"); +#else + asm( " andn %l2, 0x3, %l2"); + asm( " cmp %l2, 4"); +#endif + asm( " bl init_tail_xor_loop"); + asm( " nop"); + + asm( " main_xor_loop:"); +#ifdef FLM_64BIT + asm( " ldx [%l0], %l3"); +#else + asm( " ld [%l0], %l3"); +#endif + asm( " xor %l3, %l1, %l1"); +#ifdef FLM_64BIT + asm( " add %l0, 8, %l0"); + asm( " subcc %l2, 8, %l2"); +#else + asm( " add %l0, 4, %l0"); + asm( " subcc %l2, 4, %l2"); +#endif + asm( " bg main_xor_loop"); + asm( " nop"); + + asm( " init_tail_xor_loop:"); + asm( " cmp %i2, 0"); + asm( " be done"); + + asm( " tail_xor_loop:"); +#ifdef FLM_64BIT + asm( " ldx [%l0], %l3"); +#else + asm( " ld [%l0], %l3"); +#endif + asm( " xor %l3, %l1, %l1"); + asm( " inc %l0"); + asm( " deccc %l2"); + asm( " bg tail_xor_loop"); + asm( " nop"); + + asm( " done:"); + asm( " mov %l1, %l3"); +#ifdef FLM_64BIT + asm( " mov 7, %l2"); +#else + asm( " mov 3, %l2"); +#endif + asm( " xor_assemble_loop:"); + asm( " srlx %l3, 8, %l3"); + asm( " xor %l3, %l1, %l1"); + asm( " subcc %l2, 1, %l2"); + asm( " bg xor_assemble_loop"); + asm( " nop"); + asm( " and %l1, 0xFF, %l1"); +#ifdef FLM_64BIT + asm( " stx %l1, [%i1]"); +#else + asm( " st %l1, [%i1]"); +#endif + asm( " ret"); + asm( " restore"); + asm( ".type sparc_calc_xor, #function"); + asm( ".size sparc_calc_xor, (.-sparc_calc_xor)"); +} +#endif + /****************************************************************************** Desc: Sets the global variable to check if MMX instructions are allowed. ******************************************************************************/ @@ -599,7 +777,9 @@ void f_initFastCheckSum( void) // can do MMX instructions - unless you can assume that even on NT you // will be on at least a P5. - gv_mmxCheckSumFlag = ftkGetMMXSupported(); + gv_bHaveFastCheckSum = ftkGetMMXSupported() ? TRUE : FALSE; +#elif defined( FLM_SPARC_PLUS) + gv_bHaveFastCheckSum = TRUE; #endif } @@ -630,13 +810,16 @@ FLMUINT32 FLMAPI f_calcFastChecksum( } #if defined( FLM_X86) && (defined( FLM_GNUC) || defined( FLM_WIN) || defined( FLM_NLM)) - if( gv_mmxCheckSumFlag == 1) + if( gv_bHaveFastCheckSum) { ftkFastCheckSumMMX( (void *) pucData, (unsigned long *) &uiAdds, (unsigned long *) &uiXORs, (unsigned long) uiLength); } - else +#elif defined( FLM_SPARC_PLUS) + sparc_calc_checksum( pucData, &uiAdds, &uiXORs, uiLength); #endif + + if( !gv_bHaveFastCheckSum) { FLMBYTE * pucCur = pucData; FLMBYTE * pucEnd = pucData + uiLength; @@ -767,18 +950,21 @@ FLMBYTE FLMAPI f_calcPacketChecksum( FLMUINT uiBytesToChecksum) { FLMUINT uiChecksum = 0; + unsigned long uiAdds = 0; + unsigned long uiXORs = 0; #if defined( FLM_X86) && (defined( FLM_GNUC) || defined( FLM_WIN) || defined( FLM_NLM)) - if( gv_mmxCheckSumFlag == 1) + if( gv_bHaveFastCheckSum) { - unsigned long uiAdds = 0; - unsigned long uiXORs = 0; - ftkFastCheckSumMMX( pucPacket, &uiAdds, &uiXORs, uiBytesToChecksum); uiChecksum = uiXORs; } - else +#elif defined( FLM_SPARC_PLUS) + sparc_calc_xor( pucPacket, &uiXORs, uiBytesToChecksum); + uiChecksum = uiXORs; #endif + + if( !gv_bHaveFastCheckSum) { FLMBYTE * pucEnd; FLMBYTE * pucSectionEnd; diff --git a/ftk/src/ftkunix.cpp b/ftk/src/ftkunix.cpp index 8aaf83d..985045a 100644 --- a/ftk/src/ftkunix.cpp +++ b/ftk/src/ftkunix.cpp @@ -1323,13 +1323,13 @@ void sparc_asm_code( void) asm( ".type sparc_atomic_add_32, #function"); asm( "sparc_atomic_add_32:"); asm( " membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad"); - asm( " ld [%o0], %l0"); - asm( " add %l0, %o1, %l2"); - asm( " cas [%o0], %l0, %l2"); - asm( " cmp %l0, %l2"); + asm( " ld [%o0], %o2"); + asm( " add %o2, %o1, %o3"); + asm( " cas [%o0], %o2, %o3"); + asm( " cmp %o2, %o3"); asm( " bne sparc_atomic_add_32"); asm( " nop"); - asm( " add %l2, %o1, %o0"); + asm( " add %o3, %o1, %o0"); asm( " membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad"); asm( "retl"); asm( "nop"); @@ -1339,13 +1339,13 @@ void sparc_asm_code( void) asm( ".type sparc_atomic_xchg_32, #function"); asm( "sparc_atomic_xchg_32:"); asm( " membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad"); - asm( " ld [%o0], %l0"); - asm( " mov %o1, %l1"); - asm( " cas [%o0], %l0, %l1"); - asm( " cmp %l0, %l1"); + asm( " ld [%o0], %o2"); + asm( " mov %o1, %o3"); + asm( " cas [%o0], %o2, %o3"); + asm( " cmp %o2, %o3"); asm( " bne sparc_atomic_xchg_32"); asm( " nop"); - asm( " mov %l0, %o0"); + asm( " mov %o2, %o0"); asm( " membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad"); asm( "retl"); asm( "nop"); diff --git a/ftk/util/ftktest.cpp b/ftk/util/ftktest.cpp index 434e0d0..6669515 100644 --- a/ftk/util/ftktest.cpp +++ b/ftk/util/ftktest.cpp @@ -409,7 +409,7 @@ FSTATIC RCODE ftkFastChecksumTest( void) FLMUINT uiSlowTime = 0; FLMUINT uiFastTime = 0; - f_printf( "Running checksum tests ... "); + f_printf( "Running checksum tests "); uiDataLength = 8192; if( RC_BAD( rc = f_alloc( uiDataLength, &pucData))) @@ -466,9 +466,14 @@ FSTATIC RCODE ftkFastChecksumTest( void) rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); goto Exit; } + + if( (uiIter % 100) == 0) + { + f_printf( "."); + } } - f_printf( "Slow time = %u ms, Fast time = %u ms. ", + f_printf( " Slow time = %u ms, Fast time = %u ms. ", (unsigned)FLM_TIMER_UNITS_TO_MILLI( uiSlowTime), (unsigned)FLM_TIMER_UNITS_TO_MILLI( uiFastTime)); @@ -501,7 +506,7 @@ FSTATIC RCODE ftkPacketChecksumTest( void) FLMUINT uiSlowTime = 0; FLMUINT uiFastTime = 0; - f_printf( "Running checksum tests ... "); + f_printf( "Running checksum tests "); uiDataLength = 64 * 1024; if( RC_BAD( rc = f_alloc( uiDataLength, &pucData))) @@ -535,9 +540,14 @@ FSTATIC RCODE ftkPacketChecksumTest( void) rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); goto Exit; } + + if( (uiIter % 100) == 0) + { + f_printf( "."); + } } - f_printf( "Slow time = %u ms, Fast time = %u ms. ", + f_printf( " Slow time = %u ms, Fast time = %u ms. ", (unsigned)FLM_TIMER_UNITS_TO_MILLI( uiSlowTime), (unsigned)FLM_TIMER_UNITS_TO_MILLI( uiFastTime));