From 77111e2f9705093cc06f9d2c4d331e650a413a40 Mon Sep 17 00:00:00 2001 From: ahodgkinson Date: Fri, 5 May 2006 23:18:22 +0000 Subject: [PATCH] Added support for collation. git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@374 0109f412-320b-0410-ab79-c3e0c5ffbbe6 --- ftk/src/ftk.h | 606 ++++++++++++++++++++++++++++++-------------- ftk/src/ftkmisc.cpp | 471 ++++++++++++++++++++++++++++++++++ ftk/src/ftksys.h | 245 ++++++++++++++++++ ftk/src/ftktext.cpp | 365 +++++++++++++++++++++++--- ftk/src/ftkxml.cpp | 74 ------ 5 files changed, 1471 insertions(+), 290 deletions(-) diff --git a/ftk/src/ftk.h b/ftk/src/ftk.h index 23c49e9..a5cf390 100644 --- a/ftk/src/ftk.h +++ b/ftk/src/ftk.h @@ -387,7 +387,6 @@ /**************************************************************************** Forward References ****************************************************************************/ - flminterface IF_DataVector; flminterface IF_DirHdl; flminterface IF_FileHdl; flminterface IF_FileSystem; @@ -404,7 +403,7 @@ flminterface IF_IOBuffer; /**************************************************************************** - CROSS PLATFORM DEFINITIONS + Desc: Cross-platform definitions ****************************************************************************/ #ifndef NULL @@ -418,10 +417,13 @@ #ifndef FALSE #define FALSE 0 #endif + + #define f_offsetof(s,m) \ + (FLMSIZET)(FLMUINT)&(((s *)0)->m) - // Language definitions - to get rid of testing "US" or multiple bytes - // will define needed languages as a number with backward conversions. - // Keep these defines synchronized with the table in wps6cmpc.c + /**************************************************************************** + Desc: Language constants + ****************************************************************************/ #define FLM_US_LANG 0 // English, United States #define FLM_AF_LANG 1 // Afrikaans @@ -467,6 +469,23 @@ #define FLM_FIRST_DBCS_LANG (FLM_JP_LANG) #define FLM_LAST_DBCS_LANG (FLM_LA_LANG) + /**************************************************************************** + Desc: Collation flags and constants + ****************************************************************************/ + + #define HAD_SUB_COLLATION 0x01 // Set if had sub-collating values-diacritics + #define HAD_LOWER_CASE 0x02 // Set if you hit a lowercase character + #define COLL_FIRST_SUBSTRING 0x03 // First substring marker + #define COLL_MARKER 0x04 // Marks place of sub-collation + + #define SC_LOWER 0x00 // Only lowercase characters exist + #define SC_MIXED 0x01 // Lower/uppercase flags follow in next byte + #define SC_UPPER 0x02 // Only upper characters exist + #define SC_SUB_COL 0x03 // Sub-collation follows (diacritics|extCh) + + #define COLL_TRUNCATED 0x0C // This key piece has been truncated from original + #define MAX_COL_OPCODE COLL_TRUNCATED + /**************************************************************************** Desc: I/O Flags ****************************************************************************/ @@ -492,6 +511,10 @@ #define FLM_MAXIMUM_FILE_SIZE 0xFFFC0000 + // Maximum SEN (compressed number) length + + #define FLM_MAX_SEN_LEN 9 + // Retrieval flags #define FLM_INCL 0x0010 @@ -932,6 +955,56 @@ IF_IStream * pIStream, IF_OStream * pOStream); + /**************************************************************************** + Desc: + ****************************************************************************/ + + typedef struct + { + FLMUINT64 ui64Position; + FLMUNICODE uNextChar; + } F_CollStreamPos; + + flminterface IF_CollIStream : public IF_PosIStream + { + virtual RCODE FLMAPI open( + IF_PosIStream * pIStream, + FLMBOOL bUnicodeStream, + FLMUINT uiLanguage, + FLMUINT uiCompareRules, + FLMBOOL bMayHaveWildCards) = 0; + + virtual RCODE FLMAPI close( void) = 0; + + virtual RCODE FLMAPI read( + void * pvBuffer, + FLMUINT uiBytesToRead, + FLMUINT * puiBytesRead) = 0; + + virtual RCODE FLMAPI read( + FLMBOOL bAllowTwoIntoOne, + FLMUNICODE * puChar, + FLMBOOL * pbCharIsWild, + FLMUINT16 * pui16Col, + FLMUINT16 * pui16SubCol, + FLMBYTE * pucCase) = 0; + + virtual FLMUINT64 FLMAPI totalSize( void) = 0; + + virtual FLMUINT64 FLMAPI remainingSize( void) = 0; + + virtual RCODE FLMAPI positionTo( + FLMUINT64 ui64Position) = 0; + + virtual FLMUINT64 FLMAPI getCurrPosition( void) = 0; + + virtual RCODE FLMAPI positionTo( + F_CollStreamPos * pPos) = 0; + + virtual void FLMAPI getCurrPosition( + F_CollStreamPos * pPos) = 0; + }; + /**************************************************************************** Desc: ****************************************************************************/ @@ -1888,16 +1961,183 @@ const FLMBYTE * pszUTF8, FLMUINT * puiNumChars); + FLMBOOL FLMAPI f_isWhitespace( + FLMUNICODE ucChar); + + FLMUNICODE FLMAPI f_convertChar( + FLMUNICODE uzChar, + FLMUINT uiCompareRules); + + RCODE FLMAPI f_wpToUnicode( + FLMUINT16 ui16WPChar, + FLMUNICODE * puUniChar); + + FLMBOOL FLMAPI f_unicodeToWP( + FLMUNICODE uUniChar, + FLMUINT16 * pui16WPChar); + + RCODE FLMAPI f_wpCheckDoubleCollation( + IF_PosIStream * pIStream, + FLMBOOL bUnicodeStream, + FLMBOOL bAllowTwoIntoOne, + FLMUNICODE * puzChar, + FLMUNICODE * puzChar2, + FLMBOOL * pbTwoIntoOne, + FLMUINT uiLanguage); + + RCODE FLMAPI f_asiaColStr2WPStr( + const FLMBYTE * pucColStr, + FLMUINT uiColStrLen, + FLMBYTE * pucWPStr, + FLMUINT * puiWPStrLen, + FLMUINT * puiUnconvChars, + FLMBOOL * pbDataTruncated, + FLMBOOL * pbFirstSubstring); + + RCODE FLMAPI f_colStr2WPStr( + const FLMBYTE * pucColStr, + FLMUINT uiColStrLen, + FLMBYTE * pucWPStr, + FLMUINT * puiWPStrLen, + FLMUINT uiLang, + FLMUINT * puiUnconvChars, + FLMBOOL * pbDataTruncated, + FLMBOOL * pbFirstSubstring); + + RCODE FLMAPI f_asiaUTF8ToColText( + IF_PosIStream * pIStream, + FLMBYTE * pucColStr, + FLMUINT * puiColStrLen, + FLMBOOL bCaseInsensitive, + FLMUINT * puiCollationLen, + FLMUINT * puiCaseLen, + FLMUINT uiCharLimit, + FLMBOOL bFirstSubstring, + FLMBOOL bDataTruncated, + FLMBOOL * pbDataTruncated); + + RCODE FLMAPI f_compareUTF8Strings( + const FLMBYTE * pucLString, + FLMUINT uiLStrBytes, + FLMBOOL bLeftWild, + const FLMBYTE * pucRString, + FLMUINT uiRStrBytes, + FLMBOOL bRightWild, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMINT * piResult); + + RCODE FLMAPI f_compareUTF8Streams( + IF_PosIStream * pLStream, + FLMBOOL bLeftWild, + IF_PosIStream * pRStream, + FLMBOOL bRightWild, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMINT * piResult); + + RCODE FLMAPI f_compareUnicodeStrings( + const FLMUNICODE * puzLString, + FLMUINT uiLStrBytes, + FLMBOOL bLeftWild, + const FLMUNICODE * puzRString, + FLMUINT uiRStrBytes, + FLMBOOL bRightWild, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMINT * piResult); + + RCODE FLMAPI f_compareUnicodeStreams( + IF_PosIStream * pLStream, + FLMBOOL bLeftWild, + IF_PosIStream * pRStream, + FLMBOOL bRightWild, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMINT * piResult); + + RCODE FLMAPI f_compareCollStreams( + IF_CollIStream * pLStream, + IF_CollIStream * pRStream, + FLMBOOL bOpIsMatch, + FLMUINT uiLanguage, + FLMINT * piResult); + + RCODE FLMAPI f_utf8IsSubStr( + const FLMBYTE * pszString, + const FLMBYTE * pszSubString, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMBOOL * pbExists); + RCODE FLMAPI f_readUTF8CharAsUnicode( IF_IStream * pStream, FLMUNICODE * puChar); + RCODE FLMAPI f_readUTF8CharAsUTF8( + IF_IStream * pIStream, + FLMBYTE * pucBuf, + FLMUINT * puiLen); + RCODE FLMAPI f_formatUTF8Text( IF_PosIStream * pIStream, FLMBOOL bAllowEscapes, FLMUINT uiCompareRules, IF_DynaBuf * pDynaBuf); + RCODE FLMAPI f_getNextMetaphone( + IF_IStream * pIStream, + FLMUINT * puiMetaphone, + FLMUINT * puiAltMetaphone = NULL); + + FLMUINT FLMAPI f_getSENLength( + FLMBYTE ucByte); + + FLMUINT FLMAPI f_getSENByteCount( + FLMUINT64 ui64Num); + + FLMUINT FLMAPI f_encodeSEN( + FLMUINT64 ui64Value, + FLMBYTE ** ppucBuffer, + FLMUINT uiBytesWanted = 0); + + RCODE FLMAPI f_encodeSEN( + FLMUINT64 ui64Value, + FLMBYTE ** ppucBuffer, + FLMBYTE * pucEnd); + + FLMUINT FLMAPI f_encodeSENKnownLength( + FLMUINT64 ui64Value, + FLMUINT uiSenLen, + FLMBYTE ** ppucBuffer); + + RCODE FLMAPI f_decodeSEN( + const FLMBYTE ** ppucBuffer, + const FLMBYTE * pucEnd, + FLMUINT * puiValue); + + RCODE FLMAPI f_decodeSEN64( + const FLMBYTE ** ppucBuffer, + const FLMBYTE * pucEnd, + FLMUINT64 * pui64Value); + + RCODE FLMAPI f_readSEN( + IF_IStream * pIStream, + FLMUINT * puiValue, + FLMUINT * puiLength = NULL); + + RCODE FLMAPI f_readSEN64( + IF_IStream * pIStream, + FLMUINT64 * pui64Value, + FLMUINT * puiLength = NULL); + + FLMUINT FLMAPI f_languageToNum( + const char * pszLanguage); + + void FLMAPI f_languageToStr( + FLMINT iLangNum, + char * pszLanguage); + /**************************************************************************** Desc: ASCII character constants and macros ****************************************************************************/ @@ -2736,8 +2976,6 @@ { public: - virtual RCODE FLMAPI setup( void) = 0; - virtual FLMBOOL FLMAPI isPubidChar( FLMUNICODE uChar) = 0; @@ -2775,6 +3013,9 @@ FLMUNICODE * puzName, FLMBYTE * pszName) = 0; }; + + RCODE FLMAPI FlmGetXMLObject( + IF_XML ** ppXmlObject); /**************************************************************************** Desc: Name table @@ -3139,189 +3380,186 @@ const char * FLMAPI f_errorString( RCODE rc); - /**************************************************************************** - Desc: Key definitions - ****************************************************************************/ - - #define FKB_ESCAPE 0xE01B /* Escape (ESC) */ - #define FKB_ESC FKB_ESCAPE - #define FKB_SPACE 0x20 - - #define FKB_HOME 0xE008 /* HOME key */ - #define FKB_UP 0xE017 /* Up arrow */ - #define FKB_PGUP 0xE059 /* Page Up */ - #define FKB_LEFT 0xE019 /* Left arrow */ - #define FKB_RIGHT 0xE018 /* Right arrow */ - #define FKB_END 0xE055 /* END key */ - #define FKB_DOWN 0xE01A /* Down arrow */ - #define FKB_PGDN 0xE05A /* Page Down */ - #define FKB_PLUS 0x002B /* Plus (+) */ - #define FKB_MINUS 0x002D /* Minus (-) */ - - #define FKB_INSERT 0xE05D /* Insert key */ - #define FKB_DELETE 0xE051 /* Delete key */ - #define FKB_BACKSPACE 0xE050 /* Backspace */ - #define FKB_TAB 0xE009 /* TAB */ - - #define FKB_ENTER 0xE00a /* Enter */ - #define FKB_F1 0xE020 /* F1 */ - #define FKB_F2 0xE021 /* F2 */ - #define FKB_F3 0xE022 /* F3 */ - #define FKB_F4 0xE023 /* F4 */ - #define FKB_F5 0xE024 /* F5 */ - #define FKB_F6 0xE025 /* F6 */ - #define FKB_F7 0xE026 /* F7 */ - #define FKB_F8 0xE027 /* F8 */ - #define FKB_F9 0xE028 /* F9 */ - #define FKB_F10 0xE029 /* F10 */ - #define FKB_F11 0xE03A /* F10 */ - #define FKB_F12 0xE03B /* F10 */ - - #define FKB_STAB 0xE05E /* Shift TAB */ - - #define FKB_SF1 0xE02C /* F1 */ - #define FKB_SF2 0xE02D /* F2 */ - #define FKB_SF3 0xE02E /* F3 */ - #define FKB_SF4 0xE02F /* F4 */ - #define FKB_SF5 0xE030 /* F5 */ - #define FKB_SF6 0xE031 /* F6 */ - #define FKB_SF7 0xE032 /* F7 */ - #define FKB_SF8 0xE033 /* F8 */ - #define FKB_SF9 0xE034 /* F9 */ - #define FKB_SF10 0xE035 /* F10 */ - #define FKB_SF11 0xE036 /* F10 */ - #define FKB_SF12 0xE037 /* F10 */ - - #define FKB_ALT_A 0xFDDC - #define FKB_ALT_B 0xFDDD - #define FKB_ALT_C 0xFDDE - #define FKB_ALT_D 0xFDDF - #define FKB_ALT_E 0xFDE0 - #define FKB_ALT_F 0xFDE1 - #define FKB_ALT_G 0xFDE2 - #define FKB_ALT_H 0xFDE3 - #define FKB_ALT_I 0xFDE4 - #define FKB_ALT_J 0xFDE5 - #define FKB_ALT_K 0xFDE6 - #define FKB_ALT_L 0xFDE7 - #define FKB_ALT_M 0xFDE8 - #define FKB_ALT_N 0xFDE9 - #define FKB_ALT_O 0xFDEA - #define FKB_ALT_P 0xFDEB - #define FKB_ALT_Q 0xFDEC - #define FKB_ALT_R 0xFDED - #define FKB_ALT_S 0xFDEE - #define FKB_ALT_T 0xFDEF - #define FKB_ALT_U 0xFDF0 - #define FKB_ALT_V 0xFDF1 - #define FKB_ALT_W 0xFDF2 - #define FKB_ALT_X 0xFDF3 - #define FKB_ALT_Y 0xFDF4 - #define FKB_ALT_Z 0xFDF5 - - #define FKB_ALT_1 0xFDF7 /* ALT 1 */ - #define FKB_ALT_2 0xFDF8 /* ALT 2 */ - #define FKB_ALT_3 0xFDF9 /* ALT 3 */ - #define FKB_ALT_4 0xFDFA /* ALT 4 */ - #define FKB_ALT_5 0xFDFB /* ALT 5 */ - #define FKB_ALT_6 0xFDFC /* ALT 6 */ - #define FKB_ALT_7 0xFDFD /* ALT 7 */ - #define FKB_ALT_8 0xFDFE /* ALT 8 */ - #define FKB_ALT_9 0xFDFF /* ALT 9 */ - #define FKB_ALT_0 0xFDF6 /* ALT 0 */ - - #define FKB_ALT_MINUS 0xE061 /* ALT MINUS */ - #define FKB_ALT_EQUAL 0xE06B /* ALT EQUAL */ - - #define FKB_ALT_F1 0xE038 /* ALT F1 */ - #define FKB_ALT_F2 0xE039 /* ALT F2 */ - #define FKB_ALT_F3 0xE03A /* ALT F3 */ - #define FKB_ALT_F4 0xE03B /* ALT F4 */ - #define FKB_ALT_F5 0xE03C /* ALT F5 */ - #define FKB_ALT_F6 0xE03D /* ALT F6 */ - #define FKB_ALT_F7 0xE03E /* ALT F7 */ - #define FKB_ALT_F8 0xE03F /* ALT F8 */ - #define FKB_ALT_F9 0xE040 /* ALT F9 */ - #define FKB_ALT_F10 0xE041 /* ALT F10 -F11,F12 NOT SUPPORTED*/ - - #define FKB_GOTO 0xE058 /* GOTO cntl-home */ - #define FKB_CTRL_HOME 0xE058 /* CTRL Home */ - #define FKB_CTRL_UP 0xE063 /* CTRL Up arrow */ - #define FKB_CTRL_PGUP 0xE057 /* CTRL Page Up */ - - #define FKB_CTRL_LEFT 0xE054 /* CTRL Left arrow */ - #define FKB_CTRL_RIGHT 0xE053 /* CTRL Right arrow */ - - #define FKB_CTRL_END 0xE00B /* CTRL END */ - #define FKB_CTRL_DOWN 0xE064 /* CTRL Down arrow */ - #define FKB_CTRL_PGDN 0xE00C /* CTRL Page Down */ - #define FKB_CTRL_INSERT 0xE06E /* CTRL Insert */ - #define FKB_CTRL_DELETE 0xE06D /* CTRL Delete */ - - #define FKB_CTRL_ENTER 0xE05F /* CTRL Enter */ - - #define FKB_CTRL_A 0xE07C - #define FKB_CTRL_B 0xE07D - #define FKB_CTRL_C 0xE07E - #define FKB_CTRL_D 0xE07F - #define FKB_CTRL_E 0xE080 - #define FKB_CTRL_F 0xE081 - #define FKB_CTRL_G 0xE082 - #define FKB_CTRL_H 0xE083 - #define FKB_CTRL_I 0xE084 - #define FKB_CTRL_J 0xE085 - #define FKB_CTRL_K 0xE086 - #define FKB_CTRL_L 0xE087 - #define FKB_CTRL_M 0xE088 - #define FKB_CTRL_N 0xE089 - #define FKB_CTRL_O 0xE08A - #define FKB_CTRL_P 0xE08B - #define FKB_CTRL_Q 0xE08C - #define FKB_CTRL_R 0xE08D - #define FKB_CTRL_S 0xE08E - #define FKB_CTRL_T 0xE08F - #define FKB_CTRL_U 0xE090 - #define FKB_CTRL_V 0xE091 - #define FKB_CTRL_W 0xE092 - #define FKB_CTRL_X 0xE093 - #define FKB_CTRL_Y 0xE094 - #define FKB_CTRL_Z 0xE095 - - #define FKB_CTRL_1 0xE06B /* F1 - NOT SUPPORTED IN WP TO F10*/ - #define FKB_CTRL_2 0xE06C /* F2 */ - #define FKB_CTRL_3 0xE06D /* F3 */ - #define FKB_CTRL_4 0xE06E /* F4 */ - #define FKB_CTRL_5 0xE06F /* F5 */ - #define FKB_CTRL_6 0xE070 /* F6 */ - #define FKB_CTRL_7 0xE071 /* F7 */ - #define FKB_CTRL_8 0xE072 /* F8 */ - #define FKB_CTRL_9 0xE073 /* F9 */ - #define FKB_CTRL_0 0xE074 /* F10 */ - - #define FKB_CTRL_MINUS 0xE060 /* MINUS */ - #define FKB_CTRL_EQUAL 0xE061 /* EQUAL - NOT SUPPORTED IN WP */ - - #define FKB_CTRL_F1 0xE038 /* F1 */ - #define FKB_CTRL_F2 0xE039 /* F2 */ - #define FKB_CTRL_F3 0xE03A /* F3 */ - #define FKB_CTRL_F4 0xE03B /* F4 */ - #define FKB_CTRL_F5 0xE03C /* F5 */ - #define FKB_CTRL_F6 0xE03D /* F6 */ - #define FKB_CTRL_F7 0xE03E /* F7 */ - #define FKB_CTRL_F8 0xE03F /* F8 */ - #define FKB_CTRL_F9 0xE040 /* F9 */ - #define FKB_CTRL_F10 0xE041 /* F10 */ - /**************************************************************************** Desc: FTX ****************************************************************************/ - #define FLM_CURSOR_BLOCK 0x01 - #define FLM_CURSOR_UNDERLINE 0x02 - #define FLM_CURSOR_INVISIBLE 0x04 - #define FLM_CURSOR_VISIBLE 0x08 + #define FKB_ESCAPE 0xE01B + #define FKB_ESC FKB_ESCAPE + #define FKB_SPACE 0x20 + + #define FKB_HOME 0xE008 + #define FKB_UP 0xE017 + #define FKB_PGUP 0xE059 + #define FKB_LEFT 0xE019 + #define FKB_RIGHT 0xE018 + #define FKB_END 0xE055 + #define FKB_DOWN 0xE01A + #define FKB_PGDN 0xE05A + #define FKB_PLUS 0x002B + #define FKB_MINUS 0x002D + + #define FKB_INSERT 0xE05D + #define FKB_DELETE 0xE051 + #define FKB_BACKSPACE 0xE050 + #define FKB_TAB 0xE009 + + #define FKB_ENTER 0xE00A + #define FKB_F1 0xE020 + #define FKB_F2 0xE021 + #define FKB_F3 0xE022 + #define FKB_F4 0xE023 + #define FKB_F5 0xE024 + #define FKB_F6 0xE025 + #define FKB_F7 0xE026 + #define FKB_F8 0xE027 + #define FKB_F9 0xE028 + #define FKB_F10 0xE029 + #define FKB_F11 0xE03A + #define FKB_F12 0xE03B + + #define FKB_STAB 0xE05E + + #define FKB_SF1 0xE02C + #define FKB_SF2 0xE02D + #define FKB_SF3 0xE02E + #define FKB_SF4 0xE02F + #define FKB_SF5 0xE030 + #define FKB_SF6 0xE031 + #define FKB_SF7 0xE032 + #define FKB_SF8 0xE033 + #define FKB_SF9 0xE034 + #define FKB_SF10 0xE035 + #define FKB_SF11 0xE036 + #define FKB_SF12 0xE037 + + #define FKB_ALT_A 0xFDDC + #define FKB_ALT_B 0xFDDD + #define FKB_ALT_C 0xFDDE + #define FKB_ALT_D 0xFDDF + #define FKB_ALT_E 0xFDE0 + #define FKB_ALT_F 0xFDE1 + #define FKB_ALT_G 0xFDE2 + #define FKB_ALT_H 0xFDE3 + #define FKB_ALT_I 0xFDE4 + #define FKB_ALT_J 0xFDE5 + #define FKB_ALT_K 0xFDE6 + #define FKB_ALT_L 0xFDE7 + #define FKB_ALT_M 0xFDE8 + #define FKB_ALT_N 0xFDE9 + #define FKB_ALT_O 0xFDEA + #define FKB_ALT_P 0xFDEB + #define FKB_ALT_Q 0xFDEC + #define FKB_ALT_R 0xFDED + #define FKB_ALT_S 0xFDEE + #define FKB_ALT_T 0xFDEF + #define FKB_ALT_U 0xFDF0 + #define FKB_ALT_V 0xFDF1 + #define FKB_ALT_W 0xFDF2 + #define FKB_ALT_X 0xFDF3 + #define FKB_ALT_Y 0xFDF4 + #define FKB_ALT_Z 0xFDF5 + + #define FKB_ALT_1 0xFDF7 + #define FKB_ALT_2 0xFDF8 + #define FKB_ALT_3 0xFDF9 + #define FKB_ALT_4 0xFDFA + #define FKB_ALT_5 0xFDFB + #define FKB_ALT_6 0xFDFC + #define FKB_ALT_7 0xFDFD + #define FKB_ALT_8 0xFDFE + #define FKB_ALT_9 0xFDFF + #define FKB_ALT_0 0xFDF6 + + #define FKB_ALT_MINUS 0xE061 + #define FKB_ALT_EQUAL 0xE06B + + #define FKB_ALT_F1 0xE038 + #define FKB_ALT_F2 0xE039 + #define FKB_ALT_F3 0xE03A + #define FKB_ALT_F4 0xE03B + #define FKB_ALT_F5 0xE03C + #define FKB_ALT_F6 0xE03D + #define FKB_ALT_F7 0xE03E + #define FKB_ALT_F8 0xE03F + #define FKB_ALT_F9 0xE040 + #define FKB_ALT_F10 0xE041 + + #define FKB_GOTO 0xE058 + #define FKB_CTRL_HOME 0xE058 + #define FKB_CTRL_UP 0xE063 + #define FKB_CTRL_PGUP 0xE057 + + #define FKB_CTRL_LEFT 0xE054 + #define FKB_CTRL_RIGHT 0xE053 + + #define FKB_CTRL_END 0xE00B + #define FKB_CTRL_DOWN 0xE064 + #define FKB_CTRL_PGDN 0xE00C + #define FKB_CTRL_INSERT 0xE06E + #define FKB_CTRL_DELETE 0xE06D + + #define FKB_CTRL_ENTER 0xE05F + + #define FKB_CTRL_A 0xE07C + #define FKB_CTRL_B 0xE07D + #define FKB_CTRL_C 0xE07E + #define FKB_CTRL_D 0xE07F + #define FKB_CTRL_E 0xE080 + #define FKB_CTRL_F 0xE081 + #define FKB_CTRL_G 0xE082 + #define FKB_CTRL_H 0xE083 + #define FKB_CTRL_I 0xE084 + #define FKB_CTRL_J 0xE085 + #define FKB_CTRL_K 0xE086 + #define FKB_CTRL_L 0xE087 + #define FKB_CTRL_M 0xE088 + #define FKB_CTRL_N 0xE089 + #define FKB_CTRL_O 0xE08A + #define FKB_CTRL_P 0xE08B + #define FKB_CTRL_Q 0xE08C + #define FKB_CTRL_R 0xE08D + #define FKB_CTRL_S 0xE08E + #define FKB_CTRL_T 0xE08F + #define FKB_CTRL_U 0xE090 + #define FKB_CTRL_V 0xE091 + #define FKB_CTRL_W 0xE092 + #define FKB_CTRL_X 0xE093 + #define FKB_CTRL_Y 0xE094 + #define FKB_CTRL_Z 0xE095 + + #define FKB_CTRL_1 0xE06B + #define FKB_CTRL_2 0xE06C + #define FKB_CTRL_3 0xE06D + #define FKB_CTRL_4 0xE06E + #define FKB_CTRL_5 0xE06F + #define FKB_CTRL_6 0xE070 + #define FKB_CTRL_7 0xE071 + #define FKB_CTRL_8 0xE072 + #define FKB_CTRL_9 0xE073 + #define FKB_CTRL_0 0xE074 + + #define FKB_CTRL_MINUS 0xE060 + #define FKB_CTRL_EQUAL 0xE061 + + #define FKB_CTRL_F1 0xE038 + #define FKB_CTRL_F2 0xE039 + #define FKB_CTRL_F3 0xE03A + #define FKB_CTRL_F4 0xE03B + #define FKB_CTRL_F5 0xE03C + #define FKB_CTRL_F6 0xE03D + #define FKB_CTRL_F7 0xE03E + #define FKB_CTRL_F8 0xE03F + #define FKB_CTRL_F9 0xE040 + #define FKB_CTRL_F10 0xE041 + + #define FLM_CURSOR_BLOCK 0x01 + #define FLM_CURSOR_UNDERLINE 0x02 + #define FLM_CURSOR_INVISIBLE 0x04 + #define FLM_CURSOR_VISIBLE 0x08 typedef struct FTX_SCREEN FTX_SCREEN; + typedef struct FTX_WINDOW FTX_WINDOW; typedef FLMBOOL (FLMAPI * KEY_HANDLER)( diff --git a/ftk/src/ftkmisc.cpp b/ftk/src/ftkmisc.cpp index 8ecf987..bba1cf0 100644 --- a/ftk/src/ftkmisc.cpp +++ b/ftk/src/ftkmisc.cpp @@ -33,6 +33,7 @@ static FLMUINT32 * gv_pui32CRCTbl = NULL; static IF_ThreadMgr * gv_pThreadMgr = NULL; static IF_FileSystem * gv_pFileSystem = NULL; static FLMUINT gv_uiMaxFileSize = FLM_MAXIMUM_FILE_SIZE; +static F_XML * gv_pXml = NULL; FSTATIC RCODE f_initSerialNumberGenerator( void); @@ -50,6 +51,46 @@ FSTATIC RCODE f_initCRCTable( #endif #endif +/**************************************************************************** +Desc: +****************************************************************************/ +static FLMBYTE gv_ucSENLengthArray[] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 - 15 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 16 - 31 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32 - 47 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48 - 63 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64 - 79 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80 - 95 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96 - 111 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 112 - 127 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 128 - 143 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 144 - 159 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 160 - 175 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 176 - 191 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 192 - 207 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 208 - 223 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 224 - 239 + 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 9 // 240 - 255 +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +static FLMBYTE ucSENPrefixArray[] = +{ + 0, + 0, + 0x80, + 0xC0, + 0xE0, + 0xF0, + 0xF8, + 0xFC, + 0xFE, + 0xFF +}; + /**************************************************************************** Desc: ****************************************************************************/ @@ -64,6 +105,11 @@ RCODE FLMAPI ftkStartup( void) f_memoryInit(); + if( RC_BAD( rc = f_initCharMappingTables())) + { + goto Exit; + } + if( RC_BAD( rc = f_allocFileSystem( &gv_pFileSystem))) { goto Exit; @@ -84,11 +130,29 @@ RCODE FLMAPI ftkStartup( void) goto Exit; } + if( (gv_pXml = f_new F_XML) == NULL) + { + rc = RC_SET( NE_FLM_MEM); + goto Exit; + } + + if( RC_BAD( rc = gv_pXml->setup())) + { + goto Exit; + } + if( RC_BAD( rc = f_checkErrorCodeTables())) { goto Exit; } +#ifdef FLM_DEBUG + if( RC_BAD( rc = f_verifyMetaphoneRoutines())) + { + goto Exit; + } +#endif + #if defined( FLM_LINUX) f_setupLinuxKernelVersion(); gv_uiMaxFileSize = f_getLinuxMaxFileSize(); @@ -144,7 +208,13 @@ void FLMAPI ftkShutdown( void) f_free( &gv_pui32CRCTbl); } + if( gv_pXml) + { + gv_pXml->Release(); + } + f_freeSerialNumberGenerator(); + f_freeCharMappingTables(); f_memoryCleanup(); } @@ -1020,3 +1090,404 @@ FLMUINT64 FLMAPI f_getMaxFileSize( void) { return( gv_uiMaxFileSize); } + +/***************************************************************************** +Desc: +******************************************************************************/ +RCODE FLMAPI f_readSEN( + IF_IStream * pIStream, + FLMUINT * puiValue, + FLMUINT * puiLength) +{ + RCODE rc; + FLMUINT64 ui64Tmp; + + if( RC_BAD( rc = f_readSEN64( pIStream, &ui64Tmp, puiLength))) + { + goto Exit; + } + + if( ui64Tmp > ~((FLMUINT)0)) + { + rc = RC_SET_AND_ASSERT( NE_FLM_CONV_DEST_OVERFLOW); + goto Exit; + } + + if( puiValue) + { + *puiValue = (FLMUINT)ui64Tmp; + } + +Exit: + + return( rc); +} + +/***************************************************************************** +Desc: +******************************************************************************/ +RCODE FLMAPI f_readSEN64( + IF_IStream * pIStream, + FLMUINT64 * pui64Value, + FLMUINT * puiLength) +{ + RCODE rc = NE_FLM_OK; + FLMUINT uiLen; + FLMUINT uiSENLength; + FLMBYTE ucBuffer[ 16]; + const FLMBYTE * pucBuffer; + + uiLen = 1; + if( RC_BAD( rc = pIStream->read( + (char *)&ucBuffer[ 0], uiLen, &uiLen))) + { + goto Exit; + } + + uiSENLength = gv_ucSENLengthArray[ ucBuffer[ 0]]; + uiLen = uiSENLength - 1; + + if( puiLength) + { + *puiLength = uiSENLength; + } + + if( pui64Value) + { + pucBuffer = &ucBuffer[ 1]; + } + else + { + pucBuffer = NULL; + } + + if( uiLen) + { + if( RC_BAD( rc = pIStream->read( + (char *)pucBuffer, uiLen, &uiLen))) + { + goto Exit; + } + } + + if( pui64Value) + { + pucBuffer = &ucBuffer[ 0]; + if( RC_BAD( rc = f_decodeSEN64( &pucBuffer, + &ucBuffer[ sizeof( ucBuffer)], pui64Value))) + { + goto Exit; + } + } + +Exit: + + return( rc); +} + +/***************************************************************************** +Desc: +******************************************************************************/ +FLMUINT FLMAPI f_getSENLength( + FLMBYTE ucByte) +{ + return( gv_ucSENLengthArray[ ucByte]); +} + +/**************************************************************************** +Desc: +****************************************************************************/ +RCODE FLMAPI f_decodeSEN64( + const FLMBYTE ** ppucBuffer, + const FLMBYTE * pucEnd, + FLMUINT64 * pui64Value) +{ + RCODE rc = NE_FLM_OK; + FLMUINT uiSENLength; + const FLMBYTE * pucBuffer = *ppucBuffer; + + uiSENLength = gv_ucSENLengthArray[ *pucBuffer]; + if( pucBuffer + uiSENLength > pucEnd) + { + if (pui64Value) + { + *pui64Value = 0; + } + rc = RC_SET( NE_FLM_BAD_SEN); + goto Exit; + } + + if (pui64Value) + { + switch( uiSENLength) + { + case 1: + *pui64Value = *pucBuffer; + break; + + case 2: + *pui64Value = (((FLMUINT64)(*pucBuffer & 0x3F)) << 8) + pucBuffer[ 1]; + break; + + case 3: + *pui64Value = (((FLMUINT64)(*pucBuffer & 0x1F)) << 16) + + (((FLMUINT64)pucBuffer[ 1]) << 8) + pucBuffer[ 2]; + break; + + case 4: + *pui64Value = (((FLMUINT64)(*pucBuffer & 0x0F)) << 24) + + (((FLMUINT64)pucBuffer[ 1]) << 16) + + (((FLMUINT64)pucBuffer[ 2]) << 8) + pucBuffer[ 3]; + break; + + case 5: + *pui64Value = (((FLMUINT64)(*pucBuffer & 0x07)) << 32) + + (((FLMUINT64)pucBuffer[ 1]) << 24) + + (((FLMUINT64)pucBuffer[ 2]) << 16) + + (((FLMUINT64)pucBuffer[ 3]) << 8) + pucBuffer[ 4]; + break; + + case 6: + *pui64Value = (((FLMUINT64)(*pucBuffer & 0x03)) << 40) + + (((FLMUINT64)pucBuffer[ 1]) << 32) + + (((FLMUINT64)pucBuffer[ 2]) << 24) + + (((FLMUINT64)pucBuffer[ 3]) << 16) + + (((FLMUINT64)pucBuffer[ 4]) << 8) + pucBuffer[ 5]; + break; + + case 7: + *pui64Value = (((FLMUINT64)(*pucBuffer & 0x01)) << 48) + + (((FLMUINT64)pucBuffer[ 1]) << 40) + + (((FLMUINT64)pucBuffer[ 2]) << 32) + + (((FLMUINT64)pucBuffer[ 3]) << 24) + + (((FLMUINT64)pucBuffer[ 4]) << 16) + + (((FLMUINT64)pucBuffer[ 5]) << 8) + pucBuffer[ 6]; + break; + + case 8: + *pui64Value = (((FLMUINT64)pucBuffer[ 1]) << 48) + + (((FLMUINT64)pucBuffer[ 2]) << 40) + + (((FLMUINT64)pucBuffer[ 3]) << 32) + + (((FLMUINT64)pucBuffer[ 4]) << 24) + + (((FLMUINT64)pucBuffer[ 5]) << 16) + + (((FLMUINT64)pucBuffer[ 6]) << 8) + pucBuffer[ 7]; + break; + + case 9: + *pui64Value = (((FLMUINT64)pucBuffer[ 1]) << 56) + + (((FLMUINT64)pucBuffer[ 2]) << 48) + + (((FLMUINT64)pucBuffer[ 3]) << 40) + + (((FLMUINT64)pucBuffer[ 4]) << 32) + + (((FLMUINT64)pucBuffer[ 5]) << 24) + + (((FLMUINT64)pucBuffer[ 6]) << 16) + + (((FLMUINT64)pucBuffer[ 7]) << 8) + pucBuffer[ 8]; + break; + + default: + *pui64Value = 0; + flmAssert( 0); + break; + } + } + +Exit: + + *ppucBuffer = pucBuffer + uiSENLength; + + return( rc); +} + +/**************************************************************************** +Desc: +****************************************************************************/ +RCODE FLMAPI f_decodeSEN( + const FLMBYTE ** ppucBuffer, + const FLMBYTE * pucEnd, + FLMUINT * puiValue) +{ + RCODE rc = NE_FLM_OK; + FLMUINT64 ui64Value; + + if( RC_BAD( rc = f_decodeSEN64( ppucBuffer, pucEnd, &ui64Value))) + { + return( rc); + } + + if( ui64Value > FLM_MAX_UINT) + { + return( RC_SET_AND_ASSERT( NE_FLM_CONV_NUM_OVERFLOW)); + } + + if( puiValue) + { + *puiValue = (FLMUINT)ui64Value; + } + + return( rc); +} + +/**************************************************************************** +Desc: +****************************************************************************/ +FINLINE FLMBYTE f_shiftRightRetByte( + FLMUINT64 ui64Num, + FLMBYTE ucBits) +{ + return( ucBits < 64 ? (FLMBYTE)(ui64Num >> ucBits) : 0); +} + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMUINT FLMAPI f_getSENByteCount( + FLMUINT64 ui64Num) +{ + FLMUINT uiCount = 0; + + if( ui64Num < 0x80) + { + return( 1); + } + + while( ui64Num) + { + uiCount++; + ui64Num >>= 7; + } + + // If the high bit is set, the counter will be incremented 1 beyond + // the actual number of bytes need to represent the SEN. We will need + // to re-visit this if we ever go beyond 64-bits. + + return( uiCount < FLM_MAX_SEN_LEN ? uiCount : FLM_MAX_SEN_LEN); +} + +/**************************************************************************** +Desc: Encodes a number as a SEN +****************************************************************************/ +FLMUINT FLMAPI f_encodeSEN( + FLMUINT64 ui64Value, + FLMBYTE ** ppucBuffer, + FLMUINT uiSizeWanted) +{ + FLMBYTE * pucBuffer = *ppucBuffer; + FLMUINT uiSenLen = f_getSENByteCount( ui64Value); + + flmAssert( uiSizeWanted <= FLM_MAX_SEN_LEN && + (!uiSizeWanted || uiSizeWanted >= uiSenLen)); + + uiSenLen = uiSizeWanted > uiSenLen ? uiSizeWanted : uiSenLen; + + if( uiSenLen == 1) + { + *pucBuffer++ = (FLMBYTE)ui64Value; + } + else + { + FLMUINT uiTmp = (uiSenLen - 1) << 3; + + *pucBuffer++ = ucSENPrefixArray[ uiSenLen] + + f_shiftRightRetByte( ui64Value, (FLMBYTE)uiTmp); + while( uiTmp) + { + uiTmp -= 8; + *pucBuffer++ = f_shiftRightRetByte( ui64Value, (FLMBYTE)uiTmp); + } + } + + *ppucBuffer = pucBuffer; + return( uiSenLen); +} + +/**************************************************************************** +Desc: Encodes a number as a SEN +****************************************************************************/ +RCODE FLMAPI f_encodeSEN( + FLMUINT64 ui64Value, + FLMBYTE ** ppucBuffer, + FLMBYTE * pucEnd) +{ + RCODE rc = NE_FLM_OK; + FLMBYTE * pucBuffer = *ppucBuffer; + FLMUINT uiSenLen = f_getSENByteCount( ui64Value); + + if( *ppucBuffer + uiSenLen > pucEnd) + { + rc = RC_SET_AND_ASSERT( NE_FLM_CONV_DEST_OVERFLOW); + goto Exit; + } + + if( uiSenLen == 1) + { + *pucBuffer++ = (FLMBYTE)ui64Value; + } + else + { + FLMUINT uiTmp = (uiSenLen - 1) << 3; + + *pucBuffer++ = ucSENPrefixArray[ uiSenLen] + + f_shiftRightRetByte( ui64Value, (FLMBYTE)uiTmp); + while( uiTmp) + { + uiTmp -= 8; + *pucBuffer++ = f_shiftRightRetByte( ui64Value, (FLMBYTE)uiTmp); + } + } + + *ppucBuffer = pucBuffer; + +Exit: + + return( rc); +} + +/**************************************************************************** +Desc: Encodes a number as a SEN +****************************************************************************/ +FLMUINT FLMAPI f_encodeSENKnownLength( + FLMUINT64 ui64Value, + FLMUINT uiSenLen, + FLMBYTE ** ppucBuffer) +{ + FLMBYTE * pucBuffer = *ppucBuffer; + + if( uiSenLen == 1) + { + *pucBuffer++ = (FLMBYTE)ui64Value; + } + else + { + FLMUINT uiTmp = (uiSenLen - 1) << 3; + + *pucBuffer++ = ucSENPrefixArray[ uiSenLen] + + f_shiftRightRetByte( ui64Value, (FLMBYTE)uiTmp); + while( uiTmp) + { + uiTmp -= 8; + *pucBuffer++ = f_shiftRightRetByte( ui64Value, (FLMBYTE)uiTmp); + } + } + + *ppucBuffer = pucBuffer; + return( uiSenLen); +} + +/**************************************************************************** +Desc: +****************************************************************************/ +RCODE FLMAPI FlmGetXMLObject( + IF_XML ** ppXmlObject) +{ + *ppXmlObject = gv_pXml; + (*ppXmlObject)->AddRef(); + + return( NE_FLM_OK); +} + +/**************************************************************************** +Desc: +****************************************************************************/ +IF_XML * f_getXmlObjPtr( void) +{ + return( gv_pXml); +} diff --git a/ftk/src/ftksys.h b/ftk/src/ftksys.h index b080796..1a88cc6 100644 --- a/ftk/src/ftksys.h +++ b/ftk/src/ftksys.h @@ -1901,6 +1901,243 @@ friend class F_FileSystem; }; + /**************************************************************************** + Desc: + ****************************************************************************/ + class F_CollIStream : public IF_CollIStream + { + public: + + F_CollIStream() + { + m_pIStream = NULL; + m_uiLanguage = 0; + m_bMayHaveWildCards = FALSE; + m_bUnicodeStream = FALSE; + m_uNextChar = 0; + } + + virtual ~F_CollIStream() + { + if( m_pIStream) + { + m_pIStream->Release(); + } + } + + RCODE FLMAPI open( + IF_PosIStream * pIStream, + FLMBOOL bUnicodeStream, + FLMUINT uiLanguage, + FLMUINT uiCompareRules, + FLMBOOL bMayHaveWildCards) + { + if( m_pIStream) + { + m_pIStream->Release(); + } + + m_pIStream = pIStream; + m_pIStream->AddRef(); + m_uiLanguage = uiLanguage; + m_uiCompareRules = uiCompareRules; + m_bCaseSensitive = (uiCompareRules & FLM_COMP_CASE_INSENSITIVE) + ? FALSE + : TRUE; + m_bMayHaveWildCards = bMayHaveWildCards; + m_bUnicodeStream = bUnicodeStream; + m_ui64EndOfLeadingSpacesPos = 0; + return( NE_FLM_OK); + } + + RCODE FLMAPI close( void) + { + if( m_pIStream) + { + m_pIStream->Release(); + m_pIStream = NULL; + } + + return( NE_FLM_OK); + } + + RCODE FLMAPI read( + void * pvBuffer, + FLMUINT uiBytesToRead, + FLMUINT * puiBytesRead) + { + RCODE rc = NE_FLM_OK; + + if( RC_BAD( rc = m_pIStream->read( pvBuffer, + uiBytesToRead, puiBytesRead))) + { + goto Exit; + } + + Exit: + + return( rc); + } + + RCODE FLMAPI read( + FLMBOOL bAllowTwoIntoOne, + FLMUNICODE * puChar, + FLMBOOL * pbCharIsWild, + FLMUINT16 * pui16Col, + FLMUINT16 * pui16SubCol, + FLMBYTE * pucCase); + + FINLINE FLMUINT64 FLMAPI totalSize( void) + { + if( m_pIStream) + { + return( m_pIStream->totalSize()); + } + + return( 0); + } + + FINLINE FLMUINT64 FLMAPI remainingSize( void) + { + if( m_pIStream) + { + return( m_pIStream->remainingSize()); + } + + return( 0); + } + + FINLINE RCODE FLMAPI positionTo( + FLMUINT64) + { + return( RC_SET_AND_ASSERT( NE_FLM_NOT_IMPLEMENTED)); + } + + FINLINE RCODE FLMAPI positionTo( + F_CollStreamPos * pPos) + { + + // Should never be able to position back to before the + // leading spaces. + + m_uNextChar = pPos->uNextChar; + flmAssert( pPos->ui64Position >= m_ui64EndOfLeadingSpacesPos); + return( m_pIStream->positionTo( pPos->ui64Position)); + } + + FINLINE FLMUINT64 FLMAPI getCurrPosition( void) + { + flmAssert( 0); + return( 0); + } + + void FLMAPI getCurrPosition( + F_CollStreamPos * pPos); + + private: + + FINLINE RCODE readCharFromStream( + FLMUNICODE * puChar) + { + RCODE rc = NE_FLM_OK; + + if( m_bUnicodeStream) + { + if( RC_BAD( rc = m_pIStream->read( puChar, sizeof( FLMUNICODE), NULL))) + { + goto Exit; + } + } + else + { + if( RC_BAD( rc = f_readUTF8CharAsUnicode( + m_pIStream, puChar))) + { + goto Exit; + } + } + + Exit: + + return( rc); + } + + IF_PosIStream * m_pIStream; + FLMUINT m_uiLanguage; + FLMBOOL m_bCaseSensitive; + FLMUINT m_uiCompareRules; + FLMUINT64 m_ui64EndOfLeadingSpacesPos; + FLMBOOL m_bMayHaveWildCards; + FLMBOOL m_bUnicodeStream; + FLMUNICODE m_uNextChar; + }; + + /**************************************************************************** + Desc: XML + ****************************************************************************/ + + typedef struct xmlChar + { + FLMBYTE ucFlags; + } XMLCHAR; + + class F_XML : public IF_XML + { + public: + + F_XML(); + + virtual ~F_XML(); + + RCODE FLMAPI setup( void); + + FLMBOOL FLMAPI isPubidChar( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isQuoteChar( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isWhitespace( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isExtender( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isCombiningChar( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isNameChar( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isNCNameChar( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isIdeographic( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isBaseChar( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isDigit( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isLetter( + FLMUNICODE uChar); + + FLMBOOL FLMAPI isNameValid( + FLMUNICODE * puzName, + FLMBYTE * pszName); + + private: + + void setCharFlag( + FLMUNICODE uLowChar, + FLMUNICODE uHighChar, + FLMUINT16 ui16Flag); + + XMLCHAR * m_pCharTable; + }; + /**************************************************************************** Desc: Logging ****************************************************************************/ @@ -1953,4 +2190,12 @@ IF_ThreadMgr * f_getThreadMgrPtr( void); + RCODE f_verifyMetaphoneRoutines( void); + + RCODE f_initCharMappingTables( void); + + void f_freeCharMappingTables( void); + + IF_XML * f_getXmlObjPtr( void); + #endif // FTKSYS_H diff --git a/ftk/src/ftktext.cpp b/ftk/src/ftktext.cpp index 2e9472c..9d10519 100644 --- a/ftk/src/ftktext.cpp +++ b/ftk/src/ftktext.cpp @@ -2199,40 +2199,10 @@ const unsigned char UnicodeProperties[ 32768] = /**************************************************************************** Desc: ****************************************************************************/ -FINLINE FLMBOOL f_isWhitespace( +FLMBOOL FLMAPI f_isWhitespace( FLMUNICODE uzChar) { - return( (uzChar == ' ' || uzChar == '\t' || - uzChar == '\n' || uzChar == '\r') ? TRUE : FALSE); -} - -/**************************************************************************** -Desc: -****************************************************************************/ -FINLINE FLMUNICODE f_convertChar( - FLMUNICODE uzChar, - FLMUINT uiCompareRules) -{ - if (uzChar == ASCII_SPACE || - (uzChar == ASCII_UNDERSCORE && - (uiCompareRules & FLM_COMP_NO_UNDERSCORES)) || - (f_isWhitespace( uzChar) && - (uiCompareRules & FLM_COMP_WHITESPACE_AS_SPACE))) - { - return( (FLMUNICODE)((uiCompareRules & - (FLM_COMP_NO_WHITESPACE | - FLM_COMP_IGNORE_LEADING_SPACE)) - ? (FLMUNICODE)0 - : (FLMUNICODE)ASCII_SPACE)); - } - else if (uzChar == ASCII_DASH && (uiCompareRules & FLM_COMP_NO_DASHES)) - { - return( (FLMUNICODE)0); - } - else - { - return( uzChar); - } + return( f_getXmlObjPtr()->isWhitespace( uzChar)); } /**************************************************************************** @@ -7287,3 +7257,334 @@ Exit: return( rc); } #endif + +/***************************************************************************** +Desc: +Notes: pucBuf must be able to contain at least 3 bytes +******************************************************************************/ +RCODE FLMAPI f_readUTF8CharAsUTF8( + IF_IStream * pIStream, + FLMBYTE * pucBuf, + FLMUINT * puiLen) +{ + RCODE rc = NE_FLM_OK; + FLMUINT uiLen; + + if( *puiLen == 0) + { + rc = RC_SET_AND_ASSERT( NE_FLM_CONV_DEST_OVERFLOW); + goto Exit; + } + + uiLen = 1; + if( RC_BAD( rc = pIStream->read( pucBuf, uiLen, &uiLen))) + { + goto Exit; + } + + if( pucBuf[ 0] <= 0x7F) + { + if( !pucBuf[ 0]) + { + rc = RC_SET( NE_FLM_EOF_HIT); + goto Exit; + } + + *puiLen = 1; + goto Exit; + } + + if( *puiLen < 2) + { + rc = RC_SET_AND_ASSERT( NE_FLM_CONV_DEST_OVERFLOW); + goto Exit; + } + + uiLen = 1; + if( RC_BAD( rc = pIStream->read( &pucBuf[ 1], uiLen, &uiLen))) + { + if( rc == NE_FLM_EOF_HIT) + { + rc = RC_SET_AND_ASSERT( NE_FLM_BAD_UTF8); + } + goto Exit; + } + + if( (pucBuf[ 1] >> 6) != 0x02) + { + rc = RC_SET_AND_ASSERT( NE_FLM_BAD_UTF8); + goto Exit; + } + + if( (pucBuf[ 0] >> 5) == 0x06) + { + *puiLen = 2; + goto Exit; + } + + if( *puiLen < 3) + { + rc = RC_SET_AND_ASSERT( NE_FLM_CONV_DEST_OVERFLOW); + goto Exit; + } + + uiLen = 1; + if( RC_BAD( rc = pIStream->read( &pucBuf[ 2], uiLen, &uiLen))) + { + if( rc == NE_FLM_EOF_HIT) + { + rc = RC_SET_AND_ASSERT( NE_FLM_BAD_UTF8); + } + goto Exit; + } + + if( (pucBuf[ 0] >> 4) != 0x0E || (pucBuf[ 2] >> 6) != 0x02) + { + rc = RC_SET_AND_ASSERT( NE_FLM_BAD_UTF8); + goto Exit; + } + + *puiLen = 3; + +Exit: + + return( rc); +} + +/*************************************************************************** +Desc: +****************************************************************************/ +RCODE FLMAPI f_compareUnicodeStrings( + const FLMUNICODE * puzLString, + FLMUINT uiLStrBytes, + FLMBOOL bLeftWild, + const FLMUNICODE * puzRString, + FLMUINT uiRStrBytes, + FLMBOOL bRightWild, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMINT * piResult) +{ + RCODE rc = NE_FLM_OK; + F_BufferIStream bufferLStream; + F_BufferIStream bufferRStream; + F_CollIStream lStream; + F_CollIStream rStream; + + if( RC_BAD( rc = bufferLStream.open( + (const char *)puzLString, uiLStrBytes))) + { + goto Exit; + } + + if( RC_BAD( rc = bufferRStream.open( + (const char *)puzRString, uiRStrBytes))) + { + goto Exit; + } + + if( RC_BAD( rc = lStream.open( &bufferLStream, TRUE, uiLanguage, + uiCompareRules, bLeftWild))) + { + goto Exit; + } + + if( RC_BAD( rc = rStream.open( &bufferRStream, TRUE, uiLanguage, + uiCompareRules, bRightWild))) + { + goto Exit; + } + + if( RC_BAD( rc = f_compareCollStreams( + (IF_CollIStream *)&lStream, (IF_CollIStream *)&rStream, + (bLeftWild || bRightWild) ? TRUE : FALSE, uiLanguage, piResult))) + { + goto Exit; + } + +Exit: + + return( rc); +} + +/*************************************************************************** +Desc: +****************************************************************************/ +RCODE FLMAPI f_compareUTF8Strings( + const FLMBYTE * pucLString, + FLMUINT uiLStrBytes, + FLMBOOL bLeftWild, + const FLMBYTE * pucRString, + FLMUINT uiRStrBytes, + FLMBOOL bRightWild, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMINT * piResult) +{ + RCODE rc = NE_FLM_OK; + F_BufferIStream bufferLStream; + F_BufferIStream bufferRStream; + F_CollIStream lStream; + F_CollIStream rStream; + + if (RC_BAD( rc = bufferLStream.open( + (const char *)pucLString, uiLStrBytes))) + { + goto Exit; + } + + if( RC_BAD( rc = bufferRStream.open( + (const char *)pucRString, uiRStrBytes))) + { + goto Exit; + } + + if( RC_BAD( rc = lStream.open( &bufferLStream, FALSE, uiLanguage, + uiCompareRules, bLeftWild))) + { + goto Exit; + } + + if( RC_BAD( rc = rStream.open( &bufferRStream, FALSE, uiLanguage, + uiCompareRules, bRightWild))) + { + goto Exit; + } + + if( RC_BAD( rc = f_compareCollStreams( + (IF_CollIStream *)&lStream, (IF_CollIStream *)&rStream, + (bLeftWild || bRightWild) ? TRUE : FALSE, + uiLanguage, piResult))) + { + goto Exit; + } + +Exit: + + return( rc); +} + +/*************************************************************************** +Desc: +****************************************************************************/ +RCODE FLMAPI f_compareUTF8Streams( + IF_PosIStream * pLStream, + FLMBOOL bLeftWild, + IF_PosIStream * pRStream, + FLMBOOL bRightWild, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMINT * piResult) +{ + RCODE rc = NE_FLM_OK; + F_CollIStream lStream; + F_CollIStream rStream; + + if( RC_BAD( rc = lStream.open( pLStream, FALSE, uiLanguage, + uiCompareRules, bLeftWild))) + { + goto Exit; + } + + if( RC_BAD( rc = rStream.open( pRStream, FALSE, uiLanguage, + uiCompareRules, bRightWild))) + { + goto Exit; + } + + if( RC_BAD( rc = f_compareCollStreams( + &lStream, &rStream, (bLeftWild || bRightWild) ? TRUE : FALSE, + uiLanguage, piResult))) + { + goto Exit; + } + +Exit: + + return( rc); +} + +/*************************************************************************** +Desc: +****************************************************************************/ +RCODE FLMAPI f_compareUnicodeStreams( + IF_PosIStream * pLStream, + FLMBOOL bLeftWild, + IF_PosIStream * pRStream, + FLMBOOL bRightWild, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMINT * piResult) +{ + RCODE rc = NE_FLM_OK; + F_CollIStream lStream; + F_CollIStream rStream; + + if( RC_BAD( rc = lStream.open( pLStream, TRUE, uiLanguage, + uiCompareRules, bLeftWild))) + { + goto Exit; + } + + if( RC_BAD( rc = rStream.open( pRStream, TRUE, uiLanguage, + uiCompareRules, bRightWild))) + { + goto Exit; + } + + if( RC_BAD( rc = f_compareCollStreams( + &lStream, &rStream, (bLeftWild || bRightWild) ? TRUE : FALSE, + uiLanguage, piResult))) + { + goto Exit; + } + +Exit: + + return( rc); +} + +/*************************************************************************** +Desc: +****************************************************************************/ +RCODE FLMAPI f_utf8IsSubStr( + const FLMBYTE * pszString, + const FLMBYTE * pszSubString, + FLMUINT uiCompareRules, + FLMUINT uiLanguage, + FLMBOOL * pbExists) +{ + RCODE rc = NE_FLM_OK; + FLMINT iResult = 0; + FLMBYTE * pszSearch = NULL; + FLMUINT uiSubStringLen = f_strlen( (const char *)pszSubString); + + if( RC_BAD( rc = f_alloc( uiSubStringLen + 3, &pszSearch))) + { + goto Exit; + } + + pszSearch[0] = '*'; + f_memcpy( &pszSearch[ 1], pszSubString, uiSubStringLen); + pszSearch[ uiSubStringLen + 1] = '*'; + pszSearch[ uiSubStringLen + 2] = '\0'; + + if( RC_BAD( rc = f_compareUTF8Strings( + pszString, f_strlen( (const char *)pszString), FALSE, pszSearch, + uiSubStringLen + 2, TRUE, uiCompareRules, uiLanguage, &iResult))) + { + goto Exit; + } + + *pbExists = (iResult)?FALSE:TRUE; + +Exit: + + if( pszSearch) + { + f_free( &pszSearch); + } + + return( rc); +} + diff --git a/ftk/src/ftkxml.cpp b/ftk/src/ftkxml.cpp index a1692d1..fa5e43a 100644 --- a/ftk/src/ftkxml.cpp +++ b/ftk/src/ftkxml.cpp @@ -24,7 +24,6 @@ //------------------------------------------------------------------------------ #include "ftksys.h" -#if 0 #define FLM_XML_BASE_CHAR 0x01 #define FLM_XML_IDEOGRAPHIC 0x02 @@ -33,11 +32,6 @@ #define FLM_XML_EXTENDER 0x10 #define FLM_XML_WHITESPACE 0x20 -typedef struct xmlChar -{ - FLMBYTE ucFlags; -} XMLCHAR; - typedef struct { char * pszEntity; @@ -391,73 +385,6 @@ static CHAR_TBL charTbl[] = { 0, 0, 0} }; -//FSTATIC RCODE exportUniValue( -// IF_OStream * pOStream, -// FLMUNICODE * puzStr, -// FLMUINT uiStrChars, -// FLMBOOL bEncodeSpecialChars, -// FLMUINT uiIndentCount); - -/**************************************************************************** -Desc: XML -****************************************************************************/ -class F_XML : public IF_XML -{ -public: - - F_XML(); - - virtual ~F_XML(); - - RCODE FLMAPI setup( void); - - FLMBOOL FLMAPI isPubidChar( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isQuoteChar( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isWhitespace( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isExtender( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isCombiningChar( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isNameChar( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isNCNameChar( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isIdeographic( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isBaseChar( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isDigit( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isLetter( - FLMUNICODE uChar); - - FLMBOOL FLMAPI isNameValid( - FLMUNICODE * puzName, - FLMBYTE * pszName); - -private: - - void setCharFlag( - FLMUNICODE uLowChar, - FLMUNICODE uHighChar, - FLMUINT16 ui16Flag); - - XMLCHAR * m_pCharTable; -}; - /**************************************************************************** Desc: ****************************************************************************/ @@ -764,4 +691,3 @@ Exit: return( bValid); } -#endif