From c48052bdf722846ca1079fb435b7c540e31ed5c0 Mon Sep 17 00:00:00 2001 From: ahodgkinson Date: Mon, 21 Aug 2006 19:49:24 +0000 Subject: [PATCH] FTK changes. Improvements to collation routines, including an optimization that results in 10x the performance when converting US English characters for collation. Added more collation unit tests. git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@777 0109f412-320b-0410-ab79-c3e0c5ffbbe6 --- ftk/src/ftk.h | 183 +- ftk/src/ftkcoll.cpp | 2124 +++++++++-------- ftk/util/ftktest.cpp | 5328 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 6639 insertions(+), 996 deletions(-) diff --git a/ftk/src/ftk.h b/ftk/src/ftk.h index ac7543e..29e2966 100644 --- a/ftk/src/ftk.h +++ b/ftk/src/ftk.h @@ -530,7 +530,7 @@ #define NE_FLM_SETTING_FILE_INFO 0xC22C ///< 0xC22C - Unexpected error occurred while setting a file's information. #define NE_FLM_IO_PENDING 0xC22D ///< 0xC22D - I/O has not yet completed #define NE_FLM_ASYNC_FAILED 0xC22E ///< 0xC22E - An async I/O operation failed - #define NE_FLM_MISALIGNED_IO 0xC22F ///< 0xC22F - Misaligned buffer, offset, or sector multiple encountered during I/O request + #define NE_FLM_MISALIGNED_IO 0xC22F ///< 0xC22F - Misaligned buffer or offset encountered during I/O request // Stream Errors - These are new @@ -689,18 +689,104 @@ Desc: Collation flags and constants ****************************************************************************/ - #define HAD_SUB_COLLATION 0x01 // Set if had sub-collating values-diacritics - #define HAD_LOWER_CASE 0x02 // Set if you hit a lowercase character - #define COLL_FIRST_SUBSTRING 0x03 // First substring marker - #define COLL_MARKER 0x04 // Marks place of sub-collation + #define F_HAD_SUB_COLLATION 0x01 // Set if had sub-collating values-diacritics + #define F_HAD_LOWER_CASE 0x02 // Set if you hit a lowercase character + #define F_COLL_FIRST_SUBSTRING 0x03 // First substring marker + #define F_COLL_MARKER 0x04 // Marks place of sub-collation - #define SC_LOWER 0x00 // Only lowercase characters exist - #define SC_MIXED 0x01 // Lower/uppercase flags follow in next byte - #define SC_UPPER 0x02 // Only upper characters exist - #define SC_SUB_COL 0x03 // Sub-collation follows (diacritics|extCh) + #define F_SC_LOWER 0x00 // Only lowercase characters exist + #define F_SC_MIXED 0x01 // Lower/uppercase flags follow in next byte + #define F_SC_UPPER 0x02 // Only upper characters exist + #define F_SC_SUB_COL 0x03 // Sub-collation follows (diacritics|extCh) - #define COLL_TRUNCATED 0x0C // This key piece has been truncated from original - #define MAX_COL_OPCODE COLL_TRUNCATED + #define F_COLL_TRUNCATED 0x0C // This key piece has been truncated from original + #define F_MAX_COL_OPCODE F_COLL_TRUNCATED + + #define F_CHSASCI 0 // ASCII + #define F_CHSMUL1 1 // Multinational 1 + #define F_CHSMUL2 2 // Multinational 2 + #define F_CHSBOXD 3 // Box drawing + #define F_CHSSYM1 4 // Typographic Symbols + #define F_CHSSYM2 5 // Iconic Symbols + #define F_CHSMATH 6 // Math + #define F_CHMATHX 7 // Math Extension + #define F_CHSGREK 8 // Greek + #define F_CHSHEB 9 // Hebrew + #define F_CHSCYR 10 // Cyrillic + #define F_CHSKANA 11 // Japanese Kana + #define F_CHSUSER 12 // User-defined + #define F_CHSARB1 13 // Arabic + #define F_CHSARB2 14 // Arabic script + + #define F_NCHSETS 15 // # of character sets (excluding asian) + #define F_ACHSETS 0x0E0 // maximum character set value - asian + #define F_ACHSMIN 0x024 // minimum character set value - asian + #define F_ACHCMAX 0x0FE // maxmimum character value in asian sets + + /**************************************************************************** + Desc: Diacritics + ****************************************************************************/ + + #define F_GRAVE 0 + #define F_CENTERD 1 + #define F_TILDE 2 + #define F_CIRCUM 3 + #define F_CROSSB 4 + #define F_SLASH 5 + #define F_ACUTE 6 + #define F_UMLAUT 7 + #define F_MACRON 8 + + #define F_APOSAB 9 + #define F_APOSBES 10 + #define F_APOSBA 11 + + #define F_RING 14 + #define F_DOTA 15 + #define F_DACUTE 16 + #define F_CEDILLA 17 + #define F_OGONEK 18 + #define F_CARON 19 + #define F_STROKE 20 + + #define F_BREVE 22 + #define F_DOTLESI 239 + #define F_DOTLESJ 25 + + #define F_GACUTE 83 // greek acute + #define F_GDIA 84 // greek diaeresis + #define F_GACTDIA 85 // acute diaeresis + #define F_GGRVDIA 86 // grave diaeresis + #define F_GGRAVE 87 // greek grave + #define F_GCIRCM 88 // greek circumflex + #define F_GSMOOTH 89 // smooth breathing + #define F_GROUGH 90 // rough breathing + #define F_GIOTA 91 // iota subscript + #define F_GSMACT 92 // smooth breathing acute + #define F_GRGACT 93 // rough breathing acute + #define F_GSMGRV 94 // smooth breathing grave + #define F_GRGGRV 95 // rough breathing grave + #define F_GSMCIR 96 // smooth breathing circumflex + #define F_GRGCIR 97 // rough breathing circumflex + #define F_GACTIO 98 // acute iota + #define F_GGRVIO 99 // grave iota + #define F_GCIRIO 100 // circumflex iota + #define F_GSMIO 101 // smooth iota + #define F_GRGIO 102 // rough iota + #define F_GSMAIO 103 // smooth acute iota + #define F_GRGAIO 104 // rough acute iota + #define F_GSMGVIO 105 // smooth grave iota + #define F_GRGGVIO 106 // rough grave iota + #define F_GSMCIO 107 // smooth circumflex iota + #define F_GRGCIO 108 // rough circumflex iota + #define F_GHPRIME 81 // high prime + #define F_GLPRIME 82 // low prime + + #define F_RACUTE 200 // russian acute + #define F_RGRAVE 201 // russian grave + #define F_RRTDESC 204 // russian right descender + #define F_ROGONEK 205 // russian ogonek + #define F_RMACRON 206 // russian macron /**************************************************************************** Desc: I/O Flags @@ -875,6 +961,12 @@ void FLMAPI ftkShutdown( void); + /**************************************************************************** + Desc: Global data + ****************************************************************************/ + + extern FLMUINT16 * gv_pui16USCollationTable; + /**************************************************************************** /// This is a pure virtual base class that other classes inherit from.\ It /// provides methods for reference counting (AddRef, Release). @@ -999,7 +1091,7 @@ }; /**************************************************************************** - Desc: Internal base class + Desc: Internal base class ****************************************************************************/ class FLMEXP F_OSBase { @@ -2627,6 +2719,45 @@ FLMUNICODE uUniChar, FLMUINT16 * pui16WPChar); + FLMBOOL FLMAPI f_depricatedUnicodeToWP( + FLMUNICODE uUniChar, + FLMUINT16 * pui16WPChar); + + FLMUINT16 FLMAPI f_wpUpper( + FLMUINT16 ui16WpChar); + + FLMBOOL FLMAPI f_wpIsUpper( + FLMUINT16 ui16WpChar); + + FLMUINT16 FLMAPI f_wpLower( + FLMUINT16 ui16WpChar); + + FLMBOOL FLMAPI f_breakWPChar( + FLMUINT16 ui16WpChar, + FLMUINT16 * pui16BaseChar, + FLMUINT16 * pui16DiacriticChar); + + FLMBOOL FLMAPI f_combineWPChar( + FLMUINT16 * pui16WpChar, + FLMUINT16 ui16BaseChar, + FLMINT16 ui16DiacriticChar); + + FLMUINT16 FLMAPI f_wpGetCollationImp( + FLMUINT16 ui16WpChar, + FLMUINT uiLanguage); + + FINLINE FLMUINT16 FLMAPI f_wpGetCollation( + FLMUINT16 ui16WpChar, + FLMUINT uiLanguage) + { + if( uiLanguage == FLM_US_LANG) + { + return( gv_pui16USCollationTable[ ui16WpChar]); + } + + return( f_wpGetCollationImp( ui16WpChar, uiLanguage)); + } + RCODE FLMAPI f_wpCheckDoubleCollation( IF_PosIStream * pIStream, FLMBOOL bUnicodeStream, @@ -2636,6 +2767,34 @@ FLMBOOL * pbTwoIntoOne, FLMUINT uiLanguage); + FLMUINT16 FLMAPI f_wpCheckDoubleCollation( + FLMUINT16 * pui16WpChar, + FLMBOOL * pbTwoIntoOne, + const FLMBYTE ** ppucInputStr, + FLMUINT uiLanguage); + + FLMUINT16 FLMAPI f_wpHanToZenkaku( + FLMUINT16 ui16WpChar, + FLMUINT16 ui16NextWpChar, + FLMUINT16 * pui16Zenkaku); + + FLMUINT16 FLMAPI f_wpZenToHankaku( + FLMUINT16 ui16WpChar, + FLMUINT16 * pui16DakutenOrHandakuten); + + FLMUINT FLMAPI f_wpToMixed( + FLMBYTE * pucWPStr, + FLMUINT uiWPStrLen, + const FLMBYTE * pucLowUpBitStr, + FLMUINT uiLang); + + RCODE FLMAPI f_asiaParseSubCol( + FLMBYTE * pucWPStr, + FLMUINT * puiWPStrLen, + FLMUINT uiMaxWPBytes, + const FLMBYTE * pucSubColBuf, + FLMUINT * puiSubColBitPos); + RCODE FLMAPI f_asiaColStr2WPStr( const FLMBYTE * pucColStr, FLMUINT uiColStrLen, diff --git a/ftk/src/ftkcoll.cpp b/ftk/src/ftkcoll.cpp index 6fe4cb4..40ff501 100644 --- a/ftk/src/ftkcoll.cpp +++ b/ftk/src/ftkcoll.cpp @@ -25,124 +25,36 @@ #include "ftksys.h" -// Character set #'s are same as high byte values -// except for algorithmic set. - -#define CHSASCI 0 // ASCII -#define CHSMUL1 1 // Multinational 1 -#define CHSMUL2 2 // Multinational 2 -#define CHSBOXD 3 // Box drawing -#define CHSSYM1 4 // Typographic Symbols -#define CHSSYM2 5 // Iconic Symbols -#define CHSMATH 6 // Math -#define CHMATHX 7 // Math Extension -#define CHSGREK 8 // Greek -#define CHSHEB 9 // Hebrew -#define CHSCYR 10 // Cyrillic -#define CHSKANA 11 // Japanese Kana -#define CHSUSER 12 // User-defined -#define CHSARB1 13 // Arabic -#define CHSARB2 14 // Arabic script - -#define NCHSETS 15 // # of character sets (excluding Asian) -#define WP_MAX_CAR60_SIZE NCHSETS -#define ACHSETS 0x0E0 // Maximum character set value - Asian -#define ACHSMIN 0x024 // Minimum character set value - Asian -#define ACHCMAX 0x0FE // Maxmimum character value in Asian sets - // Collating Sequence Equates -#define COLLS 32 // first collating number (space/end of line) -#define COLS0 255 // graphics/misc - chars without a collate value -#define COLS1 (COLLS + 9) // quotes -#define COLS2 (COLS1 + 5) // parens -#define COLS3 (COLS2 + 6) // money -#define COLS4 (COLS3 + 6) // math ops -#define COLS5 (COLS4 + 8) // math others -#define COLS6 (COLS5 + 14) // others: %#&@\_|~ -#define COLS7 (COLS6 + 13) // greek -#define COLS8 (COLS7 + 25) // numbers -#define COLS9 (COLS8 + 10) // alphabet -#define COLS10 (COLS9 + 60) // cyrillic -#define COLS10h (COLS9 + 42) // hebrew - writes over european & cyrilic -#define COLS10a (COLS10h + 28) // arabic - inclusive from 198(C6)-252(FC) -#define COLS11 253 // End of list - arabic goes to the end -#define COLS0_ARABIC COLS11 // Set if arabic accent marking -#define COLS0_HEBREW COLS11 // Set if hebrew accent marking -#define COLS_ASIAN_MARKS 0x140 -#define COLS_ASIAN_MARK_VAL 0x40 // Without 0x100 +#define COLLS 32 // first collating number (space/end of line) +#define COLS0 255 // graphics/misc - chars without a collate value +#define COLS1 (COLLS + 9) // quotes +#define COLS2 (COLS1 + 5) // parens +#define COLS3 (COLS2 + 6) // money +#define COLS4 (COLS3 + 6) // math ops +#define COLS5 (COLS4 + 8) // math others +#define COLS6 (COLS5 + 14) // others: %#&@\_|~ +#define COLS7 (COLS6 + 13) // greek +#define COLS8 (COLS7 + 25) // numbers +#define COLS9 (COLS8 + 10) // alphabet +#define COLS10 (COLS9 + 60) // cyrillic +#define COLS10h (COLS9 + 42) // hebrew - writes over european & cyrilic +#define COLS10a (COLS10h + 28) // arabic - inclusive from 198(C6)-252(FC) +#define COLS11 253 // End of list - arabic goes to the end +#define COLS0_ARABIC COLS11 // Set if arabic accent marking +#define COLS0_HEBREW COLS11 // Set if hebrew accent marking +#define COLS_ASIAN_MARKS 0x140 +#define COLS_ASIAN_MARK_VAL 0x40 // Without 0x100 -#define SET_CASE_BIT 0x01 -#define SET_KATAKANA_BIT 0x01 -#define SET_WIDTH_BIT 0x02 +#define SET_CASE_BIT 0x01 +#define SET_KATAKANA_BIT 0x01 +#define SET_WIDTH_BIT 0x02 -#define UNK_UNICODE_CODE 0xFFFE +#define UNK_UNICODE_CODE 0xFFFE -#define MAX_SUBCOL_BUF (500) -#define MAX_CASE_BYTES (150) - -// Definitions for diacritics. - -#define grave 0 -#define centerd 1 -#define tilde 2 -#define circum 3 -#define crossb 4 -#define slash 5 -#define acute 6 -#define umlaut 7 -#define macron 8 - -#define aposab 9 -#define aposbes 10 -#define aposba 11 - -#define ring 14 -#define dota 15 -#define dacute 16 -#define cedilla 17 -#define ogonek 18 -#define caron 19 -#define stroke 20 - -#define breve 22 -#define dotlesi 239 -#define dotlesj 25 - -#define gacute 83 // greek acute -#define gdia 84 // greek diaeresis -#define gactdia 85 // acute diaeresis -#define ggrvdia 86 // grave diaeresis -#define ggrave 87 // greek grave -#define gcircm 88 // greek circumflex -#define gsmooth 89 // smooth breathing -#define grough 90 // rough breathing -#define giota 91 // iota subscript -#define gsmact 92 // smooth breathing acute -#define grgact 93 // rough breathing acute -#define gsmgrv 94 // smooth breathing grave -#define grggrv 95 // rough breathing grave -#define gsmcir 96 // smooth breathing circumflex -#define grgcir 97 // rough breathing circumflex -#define gactio 98 // acute iota -#define ggrvio 99 // grave iota -#define gcirio 100 // circumflex iota -#define gsmio 101 // smooth iota -#define grgio 102 // rough iota -#define gsmaio 103 // smooth acute iota -#define grgaio 104 // rough acute iota -#define gsmgvio 105 // smooth grave iota -#define grggvio 106 // rough grave iota -#define gsmcio 107 // smooth circumflex iota -#define grgcio 108 // rough circumflex iota -#define ghprime 81 // high prime -#define glprime 82 // low prime - -#define racute 200 // russian acute -#define rgrave 201 // russian grave -#define rrtdesc 204 // russian right descender -#define rogonek 205 // russian ogonek -#define rmacron 206 // russian macron +#define MAX_SUBCOL_BUF (500) +#define MAX_CASE_BYTES (150) #define ASCTBLLEN 95 #define MNTBLLEN 219 @@ -234,7 +146,7 @@ #define FIXUP_AREA_SIZE 24 // Number of characters to fix up -FLMUINT16 flmWPAsiaGetCollation( +FSTATIC FLMUINT16 flmWPAsiaGetCollation( FLMUINT16 ui16WpChar, FLMUINT16 ui16NextWpChar, FLMUINT16 ui16PrevColValue, @@ -243,57 +155,11 @@ FLMUINT16 flmWPAsiaGetCollation( FLMBYTE * pucCaseBits, FLMBOOL bUppercaseFlag); -FLMUINT16 flmWPGetCollation( - FLMUINT16 ui16WpChar, - FLMUINT uiLanguage); - -FLMUINT16 flmWPUpper( - FLMUINT16 ui16WpChar); - -FLMUINT16 flmWPLower( - FLMUINT16 ui16WpChar); - -FLMBOOL flmWPIsUpper( - FLMUINT16 ui16WpChar); - -FLMBOOL flmWPBrkcar( - FLMUINT16 ui16WpChar, - FLMUINT16 * pui16BaseChar, - FLMUINT16 * pui16DiacriticChar); - -FLMUINT16 flmWPGetSubCol( +FSTATIC FLMUINT16 flmWPGetSubCol( FLMUINT16 ui16WPValue, FLMUINT16 ui16ColValue, FLMUINT uiLanguage); -typedef struct -{ - FLMBYTE base; - FLMBYTE diacrit; -} BASE_DIACRIT_TABLE; - -typedef struct -{ - FLMUINT16 char_count; // # of characters in table - FLMUINT16 start_char; // start char. - BASE_DIACRIT_TABLE * table; - -} BASE_DIACRIT; - -typedef struct -{ - FLMBYTE key; // character key to search on - FLMBYTE * charPtr; // character pointer for matched key -} TBL_B_TO_BP; - -typedef struct -{ - FLMBYTE ByteValue; - FLMUINT16 WordValue; -} BYTE_WORD_TBL; - -// Static functions - FSTATIC RCODE flmWPCmbSubColBuf( FLMBYTE * pucWPStr, FLMUINT * puiWPStrLen, @@ -302,28 +168,6 @@ FSTATIC RCODE flmWPCmbSubColBuf( FLMBOOL bHebrewArabic, FLMUINT * puiSubColBitPos); -FSTATIC FLMUINT flmWPToMixed( - FLMBYTE * pucWPStr, - FLMUINT uiWPStrLen, - const FLMBYTE * pucLowUpBitStr, - FLMUINT uiLang); - -FSTATIC FLMUINT16 flmWPZenToHankaku( - FLMUINT16 ui16WpChar, - FLMUINT16 * pui16DakutenOrHandakuten); - -FSTATIC FLMUINT16 flmWPHanToZenkaku( - FLMUINT16 ui16WpChar, - FLMUINT16 ui16NextWpChar, - FLMUINT16 * pui16Zenkaku); - -FSTATIC RCODE flmAsiaParseSubCol( - FLMBYTE * pucWPStr, - FLMUINT * puiWPStrLen, - FLMUINT uiMaxWPBytes, - const FLMBYTE * pucSubColBuf, - FLMUINT * puiSubColBitPos); - FSTATIC RCODE flmAsiaParseCase( FLMBYTE * pucWPStr, FLMUINT * puiWPStrLen, @@ -339,6 +183,35 @@ static FLMUINT gv_uiMinUniChar = 0; static FLMUINT gv_uiMaxUniChar = 0; static FLMUINT gv_uiMinWPChar = 0; static FLMUINT gv_uiMaxWPChar = 0; +FLMUINT16 * gv_pui16USCollationTable = NULL; + +// Typedefs + +typedef struct +{ + FLMBYTE base; + FLMBYTE diacrit; +} BASE_DIACRIT_TABLE; + +typedef struct +{ + FLMUINT16 char_count; // # of characters in table + FLMUINT16 start_char; // start char. + BASE_DIACRIT_TABLE * table; + +} BASE_DIACRIT; + +typedef struct +{ + FLMBYTE key; // character key to search on + FLMBYTE * charPtr; // character pointer for matched key +} TBL_B_TO_BP; + +typedef struct +{ + FLMBYTE ByteValue; + FLMUINT16 WordValue; +} BYTE_WORD_TBL; // Collation tables @@ -399,222 +272,222 @@ Notes: Diacritical char is always in same set as composed char ****************************************************************************/ static BASE_DIACRIT_TABLE fwp_ml1c_table[] = { - {'A',acute}, - {'a',acute}, - {'A',circum}, - {'a',circum}, - {'A',umlaut}, - {'a',umlaut}, - {'A',grave}, - {'a',grave}, - {'A',ring}, - {'a',ring}, - {0xff,0xff}, // no AE diagraph - {0xff,0xff}, // no ae diagraph - {'C',cedilla}, - {'c',cedilla}, - {'E',acute}, - {'e',acute}, - {'E',circum}, - {'e',circum}, - {'E',umlaut}, - {'e',umlaut}, - {'E',grave}, - {'e',grave}, - {'I',acute}, - {dotlesi,acute}, - {'I',circum}, - {dotlesi,circum}, - {'I',umlaut}, - {dotlesi,umlaut}, - {'I',grave}, - {dotlesi,grave}, - {'N',tilde}, - {'n',tilde}, - {'O',acute}, - {'o',acute}, - {'O',circum}, - {'o',circum}, - {'O',umlaut}, - {'o',umlaut}, - {'O',grave}, - {'o',grave}, - {'U',acute}, - {'u',acute}, - {'U',circum}, - {'u',circum}, - {'U',umlaut}, - {'u',umlaut}, - {'U',grave}, - {'u',grave}, - {'Y',umlaut}, - {'y',umlaut}, - {'A',tilde}, - {'a',tilde}, - {'D',crossb}, - {'d',crossb}, - {'O',slash}, - {'o',slash}, - {'O',tilde}, - {'o',tilde}, - {'Y',acute}, - {'y',acute}, - {0xff,0xff}, // no eth - {0xff,0xff}, // no eth - {0xff,0xff}, // no Thorn - {0xff,0xff}, // no Thorn - {'A',breve}, - {'a',breve}, - {'A',macron}, - {'a',macron}, - {'A',ogonek}, - {'a',ogonek}, - {'C',acute}, - {'c',acute}, - {'C',caron}, - {'c',caron}, - {'C',circum}, - {'c',circum}, - {'C',dota}, - {'c',dota}, - {'D',caron}, - {'d',caron}, - {'E',caron}, - {'e',caron}, - {'E',dota}, - {'e',dota}, - {'E',macron}, - {'e',macron}, - {'E',ogonek}, - {'e',ogonek}, - {'G',acute}, - {'g',acute}, - {'G',breve}, - {'g',breve}, - {'G',caron}, - {'g',caron}, - {'G',cedilla}, - {'g',aposab}, - {'G',circum}, - {'g',circum}, - {'G',dota}, - {'g',dota}, - {'H',circum}, - {'h',circum}, - {'H',crossb}, - {'h',crossb}, - {'I',dota}, - {dotlesi,dota}, - {'I',macron}, - {dotlesi,macron}, - {'I',ogonek}, - {'i',ogonek}, - {'I',tilde}, - {dotlesi,tilde}, - {0xff,0xff}, // no IJ digraph - {0xff,0xff}, // no ij digraph - {'J',circum}, - {dotlesj,circum}, - {'K',cedilla}, - {'k',cedilla}, - {'L',acute}, - {'l',acute}, - {'L',caron}, - {'l',caron}, - {'L',cedilla}, - {'l',cedilla}, - {'L',centerd}, - {'l',centerd}, - {'L',stroke}, - {'l',stroke}, - {'N',acute}, - {'n',acute}, - {'N',aposba}, - {'n',aposba}, - {'N',caron}, - {'n',caron}, - {'N',cedilla}, - {'n',cedilla}, - {'O',dacute}, - {'o',dacute}, - {'O',macron}, - {'o',macron}, - {0xff,0xff}, // OE digraph - {0xff,0xff}, // oe digraph - {'R',acute}, - {'r',acute}, - {'R',caron}, - {'r',caron}, - {'R',cedilla}, - {'r',cedilla}, - {'S',acute}, - {'s',acute}, - {'S',caron}, - {'s',caron}, - {'S',cedilla}, - {'s',cedilla}, - {'S',circum}, - {'s',circum}, - {'T',caron}, - {'t',caron}, - {'T',cedilla}, - {'t',cedilla}, - {'T',crossb}, - {'t',crossb}, - {'U',breve}, - {'u',breve}, - {'U',dacute}, - {'u',dacute}, - {'U',macron}, - {'u',macron}, - {'U',ogonek}, - {'u',ogonek}, - {'U',ring}, - {'u',ring}, - {'U',tilde}, - {'u',tilde}, - {'W',circum}, - {'w',circum}, - {'Y',circum}, - {'y',circum}, - {'Z',acute}, - {'z',acute}, - {'Z',caron}, - {'z',caron}, - {'Z',dota}, - {'z',dota}, - {0xff,0xff}, // no Eng - {0xff,0xff}, // no eng - {'D',macron}, - {'d',macron}, - {'L',macron}, - {'l',macron}, - {'N',macron}, - {'n',macron}, - {'R',grave}, - {'r',grave}, - {'S',macron}, - {'s',macron}, - {'T',macron}, - {'t',macron}, - {'Y',breve}, - {'y',breve}, - {'Y',grave}, - {'y',grave}, - {'D',aposbes}, - {'d',aposbes}, - {'O',aposbes}, - {'o',aposbes}, - {'U',aposbes}, - {'u',aposbes}, - {'E',breve}, - {'e',breve}, - {'I',breve}, - {dotlesi,breve}, - {0xff,0xff}, // no dotless I - {0xff,0xff}, // no dotless i - {'O',breve}, - {'o',breve} + {'A', F_ACUTE}, + {'a', F_ACUTE}, + {'A', F_CIRCUM}, + {'a', F_CIRCUM}, + {'A', F_UMLAUT}, + {'a', F_UMLAUT}, + {'A', F_GRAVE}, + {'a', F_GRAVE}, + {'A', F_RING}, + {'a', F_RING}, + {0xff, 0xff}, // no AE diagraph + {0xff, 0xff}, // no ae diagraph + {'C', F_CEDILLA}, + {'c', F_CEDILLA}, + {'E', F_ACUTE}, + {'e', F_ACUTE}, + {'E', F_CIRCUM}, + {'e', F_CIRCUM}, + {'E', F_UMLAUT}, + {'e', F_UMLAUT}, + {'E', F_GRAVE}, + {'e', F_GRAVE}, + {'I', F_ACUTE}, + {F_DOTLESI, F_ACUTE}, + {'I', F_CIRCUM}, + {F_DOTLESI, F_CIRCUM}, + {'I', F_UMLAUT}, + {F_DOTLESI, F_UMLAUT}, + {'I', F_GRAVE}, + {F_DOTLESI, F_GRAVE}, + {'N', F_TILDE}, + {'n', F_TILDE}, + {'O', F_ACUTE}, + {'o', F_ACUTE}, + {'O', F_CIRCUM}, + {'o', F_CIRCUM}, + {'O', F_UMLAUT}, + {'o', F_UMLAUT}, + {'O', F_GRAVE}, + {'o', F_GRAVE}, + {'U', F_ACUTE}, + {'u', F_ACUTE}, + {'U', F_CIRCUM}, + {'u', F_CIRCUM}, + {'U', F_UMLAUT}, + {'u', F_UMLAUT}, + {'U', F_GRAVE}, + {'u', F_GRAVE}, + {'Y', F_UMLAUT}, + {'y', F_UMLAUT}, + {'A', F_TILDE}, + {'a', F_TILDE}, + {'D', F_CROSSB}, + {'d', F_CROSSB}, + {'O', F_SLASH}, + {'o', F_SLASH}, + {'O', F_TILDE}, + {'o', F_TILDE}, + {'Y', F_ACUTE}, + {'y', F_ACUTE}, + {0xff, 0xff}, // no eth + {0xff, 0xff}, // no eth + {0xff, 0xff}, // no Thorn + {0xff, 0xff}, // no Thorn + {'A', F_BREVE}, + {'a', F_BREVE}, + {'A', F_MACRON}, + {'a', F_MACRON}, + {'A', F_OGONEK}, + {'a', F_OGONEK}, + {'C', F_ACUTE}, + {'c', F_ACUTE}, + {'C', F_CARON}, + {'c', F_CARON}, + {'C', F_CIRCUM}, + {'c', F_CIRCUM}, + {'C', F_DOTA}, + {'c', F_DOTA}, + {'D', F_CARON}, + {'d', F_CARON}, + {'E', F_CARON}, + {'e', F_CARON}, + {'E', F_DOTA}, + {'e', F_DOTA}, + {'E', F_MACRON}, + {'e', F_MACRON}, + {'E', F_OGONEK}, + {'e', F_OGONEK}, + {'G', F_ACUTE}, + {'g', F_ACUTE}, + {'G', F_BREVE}, + {'g', F_BREVE}, + {'G', F_CARON}, + {'g', F_CARON}, + {'G', F_CEDILLA}, + {'g', F_APOSAB}, + {'G', F_CIRCUM}, + {'g', F_CIRCUM}, + {'G', F_DOTA}, + {'g', F_DOTA}, + {'H', F_CIRCUM}, + {'h', F_CIRCUM}, + {'H', F_CROSSB}, + {'h', F_CROSSB}, + {'I', F_DOTA}, + {F_DOTLESI, F_DOTA}, + {'I', F_MACRON}, + {F_DOTLESI, F_MACRON}, + {'I', F_OGONEK}, + {'i', F_OGONEK}, + {'I', F_TILDE}, + {F_DOTLESI, F_TILDE}, + {0xff, 0xff}, // no IJ digraph + {0xff, 0xff}, // no ij digraph + {'J', F_CIRCUM}, + {F_DOTLESJ, F_CIRCUM}, + {'K', F_CEDILLA}, + {'k', F_CEDILLA}, + {'L', F_ACUTE}, + {'l', F_ACUTE}, + {'L', F_CARON}, + {'l', F_CARON}, + {'L', F_CEDILLA}, + {'l', F_CEDILLA}, + {'L', F_CENTERD}, + {'l', F_CENTERD}, + {'L', F_STROKE}, + {'l', F_STROKE}, + {'N', F_ACUTE}, + {'n', F_ACUTE}, + {'N', F_APOSBA}, + {'n', F_APOSBA}, + {'N', F_CARON}, + {'n', F_CARON}, + {'N', F_CEDILLA}, + {'n', F_CEDILLA}, + {'O', F_DACUTE}, + {'o', F_DACUTE}, + {'O', F_MACRON}, + {'o', F_MACRON}, + {0xff, 0xff}, // OE digraph + {0xff, 0xff}, // oe digraph + {'R', F_ACUTE}, + {'r', F_ACUTE}, + {'R', F_CARON}, + {'r', F_CARON}, + {'R', F_CEDILLA}, + {'r', F_CEDILLA}, + {'S', F_ACUTE}, + {'s', F_ACUTE}, + {'S', F_CARON}, + {'s', F_CARON}, + {'S', F_CEDILLA}, + {'s', F_CEDILLA}, + {'S', F_CIRCUM}, + {'s', F_CIRCUM}, + {'T', F_CARON}, + {'t', F_CARON}, + {'T', F_CEDILLA}, + {'t', F_CEDILLA}, + {'T', F_CROSSB}, + {'t', F_CROSSB}, + {'U', F_BREVE}, + {'u', F_BREVE}, + {'U', F_DACUTE}, + {'u', F_DACUTE}, + {'U', F_MACRON}, + {'u', F_MACRON}, + {'U', F_OGONEK}, + {'u', F_OGONEK}, + {'U', F_RING}, + {'u', F_RING}, + {'U', F_TILDE}, + {'u', F_TILDE}, + {'W', F_CIRCUM}, + {'w', F_CIRCUM}, + {'Y', F_CIRCUM}, + {'y', F_CIRCUM}, + {'Z', F_ACUTE}, + {'z', F_ACUTE}, + {'Z', F_CARON}, + {'z', F_CARON}, + {'Z', F_DOTA}, + {'z', F_DOTA}, + {0xff, 0xff}, // no Eng + {0xff, 0xff}, // no eng + {'D', F_MACRON}, + {'d', F_MACRON}, + {'L', F_MACRON}, + {'l', F_MACRON}, + {'N', F_MACRON}, + {'n', F_MACRON}, + {'R', F_GRAVE}, + {'r', F_GRAVE}, + {'S', F_MACRON}, + {'s', F_MACRON}, + {'T', F_MACRON}, + {'t', F_MACRON}, + {'Y', F_BREVE}, + {'y', F_BREVE}, + {'Y', F_GRAVE}, + {'y', F_GRAVE}, + {'D', F_APOSBES}, + {'d', F_APOSBES}, + {'O', F_APOSBES}, + {'o', F_APOSBES}, + {'U', F_APOSBES}, + {'u', F_APOSBES}, + {'E', F_BREVE}, + {'e', F_BREVE}, + {'I', F_BREVE}, + {F_DOTLESI, F_BREVE}, + {0xff, 0xff}, // no dotless I + {0xff, 0xff}, // no dotless i + {'O', F_BREVE}, + {'o', F_BREVE} }; /**************************************************************************** @@ -638,169 +511,169 @@ Notes: Diacritical char is always in same set as composed char ****************************************************************************/ static BASE_DIACRIT_TABLE fwp_grk_c_table[] = { - { 0, ghprime }, // ALPHA High Prime - { 1, gacute }, // alpha acute - { 10, ghprime }, // EPSILON High Prime - { 11, gacute }, // epsilon Acute - { 14, ghprime }, // ETA High Prime - { 15, gacute }, // eta Acute - { 18, ghprime }, // IOTA High Prime - { 19, gacute }, // iota Acute - { 0xFF, 0xFF }, // IOTA Diaeresis - { 19, gdia }, // iota Diaeresis - { 30, ghprime }, // OMICRON High Prime - { 31, gacute }, // omicron Acute - { 42, ghprime }, // UPSILON High Prime - { 43, gacute }, // upsilon Acute - { 0xFF, 0xFF }, // UPSILON Diaeresis - { 43,gdia }, // upsilon Diaeresis - { 50,ghprime }, // OMEGA High Prime - { 51,gacute }, // omega Acute - { 0xFF, 0xFF }, // epsilon (Variant) - { 0xFF, 0xFF }, // theta (Variant) - { 0xFF, 0xFF }, // kappa (Variant) - { 0xFF, 0xFF }, // pi (Variant) - { 0xFF, 0xFF }, // rho (Variant) - { 0xFF, 0xFF }, // sigma (Variant) - { 0xFF, 0xFF }, // UPSILON (Variant) - { 0xFF, 0xFF }, // phi (Variant) - { 0xFF, 0xFF }, // omega (Variant) - { 0xFF, 0xFF }, // Greek Question Mark - { 0xFF, 0xFF }, // Greek Semicolon - { 0xFF, 0xFF }, // High Prime - { 0xFF, 0xFF }, // Low Prime - { 0xFF, 0xFF }, // Acute (Greek) - { 0xFF, 0xFF }, // Diaeresis (Greek) - { gacute,gdia }, // Acute Diaeresis - { ggrave, gdia }, // Grave Diaeresis - { 0xFF, 0xFF }, // Grave (Greek) - { 0xFF, 0xFF }, // Circumflex (Greek) - { 0xFF, 0xFF }, // Smooth Breathing - { 0xFF, 0xFF }, // Rough Breathing - { 0xFF, 0xFF }, // Iota Subscript - { gsmooth, gacute }, // Smooth Breathing Acute - { grough, gacute }, // Rough Breathing Acute - { gsmooth, ggrave }, // Smooth Breathing Grave - { grough, ggrave }, // Rough Breathing Grave - { gsmooth, gcircm }, // Smooth Breathing Circumflex - { grough, gcircm }, // Rough Breathing Circumflex - { gacute, giota }, // Acute w/Iota Subscript - { ggrave, giota }, // Grave w/Iota Subscript - { gcircm, giota }, // Circumflex w/Iota Subscript - { gsmooth, giota }, // Smooth Breathing w/Iota Subscript - { grough, giota }, // Rough Breathing w/Iota Subscript - { gsmact, giota }, // Smooth Breathing Acute w/Iota Subscript - { grgact, giota }, // Rough Breathing Acute w/Iota Subscript - { gsmgrv, giota }, // Smooth Breathing Grave w/Iota Subscript - { grggrv, giota }, // Rough Breathing Grave w/Iota Subscript - { gsmcir, giota }, // Smooth Breathing Circumflex w/Iota Sub - { grgcir, giota }, // Rough Breathing Circumflex w/Iota Sub - { 1, ggrave }, // alpha Grave - { 1, gcircm }, // alpha Circumflex - { 1, giota }, // alpha w/Iota - { 1, gactio }, // alpha Acute w/Iota - { 1, ggrvio }, // alpha Grave w/Iota - { 1, gcirio }, // alpha Circumflex w/Iota - { 1, gsmooth }, // alpha Smooth - { 1, gsmact }, // alpha Smooth Acute - { 1, gsmgrv }, // alpha Smooth Grave - { 1, gsmcir }, // alpha Smooth Circumflex - { 1, gsmio }, // alpha Smooth w/Iota - { 1, gsmaio }, // alpha Smooth Acute w/Iota - { 1, gsmgvio }, // alpha Smooth Grave w/Iota - { 1, gsmcio }, // alpha Smooth Circumflex w/Iota - { 1, grough }, // alpha Rough - { 1, grgact }, // alpha Rough Acute - { 1, grggrv }, // alpha Rough Grave - { 1, grgcir }, // alpha Rough Circumflex - { 1, grgio }, // alpha Rough w/Iota - { 1, grgaio }, // alpha Rough Acute w/Iota - { 1, grggvio }, // alpha Rough Grave w/Iota - { 1, grgcio }, // alpha Rough Circumflex w/Iota - { 11, ggrave }, // epsilon Grave - { 11, gsmooth }, // epsilon Smooth - { 11, gsmact }, // epsilon Smooth Acute - { 11, gsmgrv }, // epsilon Smooth Grave - { 11, grough }, // epsilon Rough - { 11, grgact }, // epsilon Rough Acute - { 11, grggrv }, // epsilon Rough Grave - { 15, ggrave }, // eta Grave - { 15, gcircm }, // eta Circumflex - { 15, giota }, // eta w/Iota - { 15, gactio }, // eta Acute w/Iota - { 15, ggrvio }, // eta Grave w/Iota - { 15, gcirio }, // eta Circumflex w/Iota - { 15, gsmooth }, // eta Smooth - { 15, gsmact }, // eta Smooth Acute - { 15, gsmgrv }, // eta Smooth Grave - { 15, gsmcir }, // eta Smooth Circumflex - { 15, gsmio }, // eta Smooth w/Iota - { 15, gsmaio }, // eta Smooth Acute w/Iota - { 15, gsmgvio }, // eta Smooth Grave w/Iota - { 15, gsmcio }, // eta Smooth Circumflex w/Iota - { 15, grough }, // eta Rough - { 15, grgact }, // eta Rough Acute - { 15, grggrv }, // eta Rough Grave - { 15, grgcir }, // eta Rough Circumflex - { 15, grgio }, // eta Rough w/Iota - { 15, grgaio }, // eta Rough Acute w/Iota - { 15, grggvio }, // eta Rough Grave w/Iota - { 15, grgcio }, // eta Rough Circumflex w/Iota - { 19, ggrave }, // iota Grave - { 19, gcircm }, // iota Circumflex - { 19, gactdia }, // iota Acute Diaeresis - { 19, ggrvdia }, // iota Grave Diaeresis - { 19, gsmooth }, // iota Smooth - { 19, gsmact }, // iota Smooth Acute - { 19, gsmgrv }, // iota Smooth Grave - { 19, gsmcir }, // iota Smooth Circumflex - { 19, grough }, // iota Rough - { 19, grgact }, // iota Rough Acute - { 19, grggrv }, // iota Rough Grave - { 19, grgcir }, // iota Rough Circumflex - { 31, ggrave }, // omicron Grave - { 31, gsmooth }, // omicron Smooth - { 31, gsmact }, // omicron Smooth Acute - { 31, gsmgrv }, // omicron Smooth Grave - { 31, grough }, // omicron Rough - { 31, grgact }, // omicron Rough Acute - { 31, grggrv }, // omicron Rough Grave - { 0xFF, 0xFF }, // rho rough - { 0xFF, 0xFF }, // rho smooth - { 43, ggrave }, // upsilon Grave - { 43, gcircm }, // upsilon Circumflex - { 43, gactdia }, // upsilon Acute Diaeresis - { 43, ggrvdia }, // upsilon Grave Diaeresis - { 43, gsmooth }, // upsilon Smooth - { 43, gsmact }, // upsilon Smooth Acute - { 43, gsmgrv }, // upsilon Smooth Grave - { 43, gsmcir }, // upsilon Smooth Circumflex - { 43, grough }, // upsilon Rough - { 43, grgact }, // upsilon Rough Acute - { 43, grggrv }, // upsilon Rough Grave - { 43, grgcir }, // upsilon Rough Circumflex - { 51, ggrave }, // omega Grave - { 51, gcircm }, // omega Circumflex - { 51, giota }, // omega w/Iota - { 51, gactio }, // omega Acute w/Iota - { 51, ggrvio }, // omega Grave w/Iota - { 51, gcirio }, // omega Circumflex w/Iota - { 51, gsmooth }, // omega Smooth - { 51, gsmact }, // omega Smooth Acute - { 51, gsmgrv }, // omega Smooth Grave - { 51, gsmcir }, // omega Smooth Circumflex - { 51, gsmio }, // omega Smooth w/Iota - { 51, gsmaio }, // omega Smooth Acute w/Iota - { 51, gsmgvio }, // omega Smooth Grave w/Iota - { 51, gsmcio }, // omega Smooth Circumflex w/Iota - { 51, grough }, // omega Rough - { 51, grgact }, // omega Rough Acute - { 51, grggrv }, // omega Rough Grave - { 51, grgcir }, // omega Rough Circumflex - { 51, grgio }, // omega Rough w/Iota - { 51, grgaio }, // omega Rough Acute w/Iota - { 51, grggvio }, // omega Rough Grave w/Iota - { 51, grgcio} // omega Rough Circumflex w/Iota + { 0, F_GHPRIME }, // ALPHA High Prime + { 1, F_GACUTE }, // alpha acute + { 10, F_GHPRIME }, // EPSILON High Prime + { 11, F_GACUTE }, // epsilon Acute + { 14, F_GHPRIME }, // ETA High Prime + { 15, F_GACUTE }, // eta Acute + { 18, F_GHPRIME }, // IOTA High Prime + { 19, F_GACUTE }, // iota Acute + { 0xFF, 0xFF }, // IOTA Diaeresis + { 19, F_GDIA }, // iota Diaeresis + { 30, F_GHPRIME }, // OMICRON High Prime + { 31, F_GACUTE }, // omicron Acute + { 42, F_GHPRIME }, // UPSILON High Prime + { 43, F_GACUTE }, // upsilon Acute + { 0xFF, 0xFF }, // UPSILON Diaeresis + { 43, F_GDIA }, // upsilon Diaeresis + { 50, F_GHPRIME }, // OMEGA High Prime + { 51, F_GACUTE }, // omega Acute + { 0xFF, 0xFF }, // epsilon (Variant) + { 0xFF, 0xFF }, // theta (Variant) + { 0xFF, 0xFF }, // kappa (Variant) + { 0xFF, 0xFF }, // pi (Variant) + { 0xFF, 0xFF }, // rho (Variant) + { 0xFF, 0xFF }, // sigma (Variant) + { 0xFF, 0xFF }, // UPSILON (Variant) + { 0xFF, 0xFF }, // phi (Variant) + { 0xFF, 0xFF }, // omega (Variant) + { 0xFF, 0xFF }, // Greek Question Mark + { 0xFF, 0xFF }, // Greek Semicolon + { 0xFF, 0xFF }, // High Prime + { 0xFF, 0xFF }, // Low Prime + { 0xFF, 0xFF }, // Acute (Greek) + { 0xFF, 0xFF }, // Diaeresis (Greek) + { F_GACUTE, F_GDIA }, // Acute Diaeresis + { F_GGRAVE, F_GDIA }, // Grave Diaeresis + { 0xFF, 0xFF }, // Grave (Greek) + { 0xFF, 0xFF }, // Circumflex (Greek) + { 0xFF, 0xFF }, // Smooth Breathing + { 0xFF, 0xFF }, // Rough Breathing + { 0xFF, 0xFF }, // Iota Subscript + { F_GSMOOTH, F_GACUTE }, // Smooth Breathing Acute + { F_GROUGH, F_GACUTE }, // Rough Breathing Acute + { F_GSMOOTH, F_GGRAVE }, // Smooth Breathing Grave + { F_GROUGH, F_GGRAVE }, // Rough Breathing Grave + { F_GSMOOTH, F_GCIRCM }, // Smooth Breathing Circumflex + { F_GROUGH, F_GCIRCM }, // Rough Breathing Circumflex + { F_GACUTE, F_GIOTA }, // Acute w/Iota Subscript + { F_GGRAVE, F_GIOTA }, // Grave w/Iota Subscript + { F_GCIRCM, F_GIOTA }, // Circumflex w/Iota Subscript + { F_GSMOOTH, F_GIOTA }, // Smooth Breathing w/Iota Subscript + { F_GROUGH, F_GIOTA }, // Rough Breathing w/Iota Subscript + { F_GSMACT, F_GIOTA }, // Smooth Breathing Acute w/Iota Subscript + { F_GRGACT, F_GIOTA }, // Rough Breathing Acute w/Iota Subscript + { F_GSMGRV, F_GIOTA }, // Smooth Breathing Grave w/Iota Subscript + { F_GRGGRV, F_GIOTA }, // Rough Breathing Grave w/Iota Subscript + { F_GSMCIR, F_GIOTA }, // Smooth Breathing Circumflex w/Iota Sub + { F_GRGCIR, F_GIOTA }, // Rough Breathing Circumflex w/Iota Sub + { 1, F_GGRAVE }, // alpha Grave + { 1, F_GCIRCM }, // alpha Circumflex + { 1, F_GIOTA }, // alpha w/Iota + { 1, F_GACTIO }, // alpha Acute w/Iota + { 1, F_GGRVIO }, // alpha Grave w/Iota + { 1, F_GCIRIO }, // alpha Circumflex w/Iota + { 1, F_GSMOOTH }, // alpha Smooth + { 1, F_GSMACT }, // alpha Smooth Acute + { 1, F_GSMGRV }, // alpha Smooth Grave + { 1, F_GSMCIR }, // alpha Smooth Circumflex + { 1, F_GSMIO }, // alpha Smooth w/Iota + { 1, F_GSMAIO }, // alpha Smooth Acute w/Iota + { 1, F_GSMGVIO }, // alpha Smooth Grave w/Iota + { 1, F_GSMCIO }, // alpha Smooth Circumflex w/Iota + { 1, F_GROUGH }, // alpha Rough + { 1, F_GRGACT }, // alpha Rough Acute + { 1, F_GRGGRV }, // alpha Rough Grave + { 1, F_GRGCIR }, // alpha Rough Circumflex + { 1, F_GRGIO }, // alpha Rough w/Iota + { 1, F_GRGAIO }, // alpha Rough Acute w/Iota + { 1, F_GRGGVIO }, // alpha Rough Grave w/Iota + { 1, F_GRGCIO }, // alpha Rough Circumflex w/Iota + { 11, F_GGRAVE }, // epsilon Grave + { 11, F_GSMOOTH }, // epsilon Smooth + { 11, F_GSMACT }, // epsilon Smooth Acute + { 11, F_GSMGRV }, // epsilon Smooth Grave + { 11, F_GROUGH }, // epsilon Rough + { 11, F_GRGACT }, // epsilon Rough Acute + { 11, F_GRGGRV }, // epsilon Rough Grave + { 15, F_GGRAVE }, // eta Grave + { 15, F_GCIRCM }, // eta Circumflex + { 15, F_GIOTA }, // eta w/Iota + { 15, F_GACTIO }, // eta Acute w/Iota + { 15, F_GGRVIO }, // eta Grave w/Iota + { 15, F_GCIRIO }, // eta Circumflex w/Iota + { 15, F_GSMOOTH }, // eta Smooth + { 15, F_GSMACT }, // eta Smooth Acute + { 15, F_GSMGRV }, // eta Smooth Grave + { 15, F_GSMCIR }, // eta Smooth Circumflex + { 15, F_GSMIO }, // eta Smooth w/Iota + { 15, F_GSMAIO }, // eta Smooth Acute w/Iota + { 15, F_GSMGVIO }, // eta Smooth Grave w/Iota + { 15, F_GSMCIO }, // eta Smooth Circumflex w/Iota + { 15, F_GROUGH }, // eta Rough + { 15, F_GRGACT }, // eta Rough Acute + { 15, F_GRGGRV }, // eta Rough Grave + { 15, F_GRGCIR }, // eta Rough Circumflex + { 15, F_GRGIO }, // eta Rough w/Iota + { 15, F_GRGAIO }, // eta Rough Acute w/Iota + { 15, F_GRGGVIO }, // eta Rough Grave w/Iota + { 15, F_GRGCIO }, // eta Rough Circumflex w/Iota + { 19, F_GGRAVE }, // iota Grave + { 19, F_GCIRCM }, // iota Circumflex + { 19, F_GACTDIA }, // iota Acute Diaeresis + { 19, F_GGRVDIA }, // iota Grave Diaeresis + { 19, F_GSMOOTH }, // iota Smooth + { 19, F_GSMACT }, // iota Smooth Acute + { 19, F_GSMGRV }, // iota Smooth Grave + { 19, F_GSMCIR }, // iota Smooth Circumflex + { 19, F_GROUGH }, // iota Rough + { 19, F_GRGACT }, // iota Rough Acute + { 19, F_GRGGRV }, // iota Rough Grave + { 19, F_GRGCIR }, // iota Rough Circumflex + { 31, F_GGRAVE }, // omicron Grave + { 31, F_GSMOOTH }, // omicron Smooth + { 31, F_GSMACT }, // omicron Smooth Acute + { 31, F_GSMGRV }, // omicron Smooth Grave + { 31, F_GROUGH }, // omicron Rough + { 31, F_GRGACT }, // omicron Rough Acute + { 31, F_GRGGRV }, // omicron Rough Grave + { 0xFF, 0xFF }, // rho rough + { 0xFF, 0xFF }, // rho smooth + { 43, F_GGRAVE }, // upsilon Grave + { 43, F_GCIRCM }, // upsilon Circumflex + { 43, F_GACTDIA }, // upsilon Acute Diaeresis + { 43, F_GGRVDIA }, // upsilon Grave Diaeresis + { 43, F_GSMOOTH }, // upsilon Smooth + { 43, F_GSMACT }, // upsilon Smooth Acute + { 43, F_GSMGRV }, // upsilon Smooth Grave + { 43, F_GSMCIR }, // upsilon Smooth Circumflex + { 43, F_GROUGH }, // upsilon Rough + { 43, F_GRGACT }, // upsilon Rough Acute + { 43, F_GRGGRV }, // upsilon Rough Grave + { 43, F_GRGCIR }, // upsilon Rough Circumflex + { 51, F_GGRAVE }, // omega Grave + { 51, F_GCIRCM }, // omega Circumflex + { 51, F_GIOTA }, // omega w/Iota + { 51, F_GACTIO }, // omega Acute w/Iota + { 51, F_GGRVIO }, // omega Grave w/Iota + { 51, F_GCIRIO }, // omega Circumflex w/Iota + { 51, F_GSMOOTH }, // omega Smooth + { 51, F_GSMACT }, // omega Smooth Acute + { 51, F_GSMGRV }, // omega Smooth Grave + { 51, F_GSMCIR }, // omega Smooth Circumflex + { 51, F_GSMIO }, // omega Smooth w/Iota + { 51, F_GSMAIO }, // omega Smooth Acute w/Iota + { 51, F_GSMGVIO }, // omega Smooth Grave w/Iota + { 51, F_GSMCIO }, // omega Smooth Circumflex w/Iota + { 51, F_GROUGH }, // omega Rough + { 51, F_GRGACT }, // omega Rough Acute + { 51, F_GRGGRV }, // omega Rough Grave + { 51, F_GRGCIR }, // omega Rough Circumflex + { 51, F_GRGIO }, // omega Rough w/Iota + { 51, F_GRGAIO }, // omega Rough Acute w/Iota + { 51, F_GRGGVIO }, // omega Rough Grave w/Iota + { 51, F_GRGCIO} // omega Rough Circumflex w/Iota }; /**************************************************************************** @@ -824,126 +697,126 @@ Notes: Diacritical char is always in same set as composed char ****************************************************************************/ static BASE_DIACRIT_TABLE fwp_rus_c_table[] = { - { 14, 204 }, // ZHE with right descender - { 15, 204 }, // zhe with right descender - { 0xFF, 0xFF}, // DZE - { 0xFF, 0xFF}, // dze - { 0xFF, 0xFF}, // Z - { 0xFF, 0xFF}, // z - { 18, 206 }, // II with macron - { 19, 206}, // ii with macron - { 0xFF, 0xFF}, // I - { 0xFF, 0xFF}, // i - { 0xFF, 0xFF}, // YI - { 0xFF, 0xFF}, // yi - { 0xFF, 0xFF}, // I ligature - { 0xFF, 0xFF}, // i ligature - { 0xFF, 0xFF}, // JE - { 0xFF, 0xFF}, // je - { 0xFF, 0xFF}, // KJE - { 0xFF, 0xFF}, // kje - { 22, 204}, // KA with right descender - { 23, 204}, // ka with right descender - { 22, 205 }, // KA ogonek - { 23, 205 }, // ka ogonek - { 0xFF, 0xFF}, // KA vertical bar - { 0xFF, 0xFF}, // ka vertical bar - { 0xFF, 0xFF}, // LJE - { 0xFF, 0xFF}, // lje - { 28, 204 }, // EN with right descender - { 29, 204 }, // en with right descender - { 0xFF, 0xFF}, // NJE - { 0xFF, 0xFF}, // nje - { 0xFF, 0xFF}, // ROUND OMEGA - { 0xFF, 0xFF}, // round omega - { 0xFF, 0xFF}, // OMEGA - { 0xFF, 0xFF}, // omega - { 0xFF, 0xFF}, // TSHE - { 0xFF, 0xFF}, // tshe - { 0xFF, 0xFF}, // SHORT U - { 0xFF, 0xFF}, // short u - { 40, 206}, // U with macron - { 41, 206 }, // u with macron - { 0xFF, 0xFF}, // STRAIGHT U - { 0xFF, 0xFF}, // straight u - { 0xFF, 0xFF}, // STRAIGHT U BAR - { 0xFF, 0xFF}, // straight u bar - { 0xFF, 0xFF}, // OU ligature - { 0xFF, 0xFF}, // ou ligature - { 44, 204 }, // KHA with right descender - { 45, 204 }, // kha with right descender - { 44, 205 }, // KHA ogonek - { 45, 205 }, // kha ogonek - { 0xFF, 0xFF}, // H - { 0xFF, 0xFF}, // h - { 0xFF, 0xFF}, // OMEGA titlo - { 0xFF, 0xFF}, // omega titlo - { 0xFF, 0xFF}, // DZHE - { 0xFF, 0xFF}, // dzhe - { 48, 204 }, // CHE with right descender - { 49, 204 }, // che with right descender - { 0xFF, 0xFF}, // CHE vertical bar - { 0xFF, 0xFF}, // che vertical bar - { 0xFF, 0xFF}, // SHCHA (variant) - { 0xFF, 0xFF}, // shcha (variant) - { 0xFF, 0xFF}, // YAT - { 0xFF, 0xFF}, // yat - { 0xFF, 0xFF}, // YUS BOLSHOI - { 0xFF, 0xFF}, // yus bolshoi - { 0xFF, 0xFF}, // BIG MALYI - { 0xFF, 0xFF}, // big malyi - { 0xFF, 0xFF}, // KSI - { 0xFF, 0xFF}, // ksi - { 0xFF, 0xFF}, // PSI - { 0xFF, 0xFF}, // psi - { 0xFF, 0xFF}, // FITA - { 0xFF, 0xFF}, // fita - { 0xFF, 0xFF}, // IZHITSA - { 0xFF, 0xFF}, // izhitsa - { 00, racute}, // Russian A acute - { 01, racute }, // Russian a acute - { 10, racute }, // Russian IE acute - { 11, racute }, // Russian ie acute - { 78, racute }, // Russian E acute - { 79, racute }, // Russian e acute - { 18, racute }, // Russian II acute - { 19, racute }, // Russian ii acute - { 88, racute }, // Russian I acute - { 89, racute }, // Russian i acute - { 90, racute }, // Russian YI acute - { 91, racute }, // Russian yi acute - { 30, racute }, // Russian O acute - { 31, racute }, // Russian o acute - { 40, racute }, // Russian U acute - { 41, racute }, // Russian u acute - { 56, racute }, // Russian YERI acute - { 57, racute }, // Russian yeri acute - { 60, racute }, // Russian REVERSED E acute - { 61, racute }, // Russian reversed e acute - { 62, racute }, // Russian IU acute - { 63, racute }, // Russian iu acute - { 64, racute }, // Russian IA acute - { 65, racute }, // Russian ia acute - { 00, rgrave }, // Russian A grave - { 01, rgrave }, // Russian a grave - { 10, rgrave }, // Russian IE grave - { 11, rgrave }, // Russian ie grave - { 12, rgrave }, // Russian YO grave - { 13, rgrave }, // Russian yo grave - { 18, rgrave }, // Russian I grave - { 19, rgrave }, // Russian i grave - { 30, rgrave }, // Russian O grave - { 31, rgrave }, // Russian o grave - { 40, rgrave }, // Russian U grave - { 41, rgrave }, // Russian u grave - { 56, rgrave }, // Russian YERI grave - { 57, rgrave }, // Russian yeri grave - { 60, rgrave }, // Russian REVERSED E grave - { 61, rgrave }, // Russian reversed e grave - { 62, rgrave }, // Russian IU grave - { 63, rgrave }, // Russian iu grave - { 64, rgrave }, // Russian IA grave - { 65, rgrave} // Russian ia grave + { 14, 204 }, // ZHE with right descender + { 15, 204 }, // zhe with right descender + { 0xFF, 0xFF }, // DZE + { 0xFF, 0xFF }, // dze + { 0xFF, 0xFF }, // Z + { 0xFF, 0xFF }, // z + { 18, 206 }, // II with macron + { 19, 206 }, // ii with macron + { 0xFF, 0xFF }, // I + { 0xFF, 0xFF }, // i + { 0xFF, 0xFF }, // YI + { 0xFF, 0xFF }, // yi + { 0xFF, 0xFF }, // I ligature + { 0xFF, 0xFF }, // i ligature + { 0xFF, 0xFF }, // JE + { 0xFF, 0xFF }, // je + { 0xFF, 0xFF }, // KJE + { 0xFF, 0xFF }, // kje + { 22, 204 }, // KA with right descender + { 23, 204 }, // ka with right descender + { 22, 205 }, // KA ogonek + { 23, 205 }, // ka ogonek + { 0xFF, 0xFF }, // KA vertical bar + { 0xFF, 0xFF }, // ka vertical bar + { 0xFF, 0xFF }, // LJE + { 0xFF, 0xFF }, // lje + { 28, 204 }, // EN with right descender + { 29, 204 }, // en with right descender + { 0xFF, 0xFF }, // NJE + { 0xFF, 0xFF }, // nje + { 0xFF, 0xFF }, // ROUND OMEGA + { 0xFF, 0xFF }, // round omega + { 0xFF, 0xFF }, // OMEGA + { 0xFF, 0xFF }, // omega + { 0xFF, 0xFF }, // TSHE + { 0xFF, 0xFF }, // tshe + { 0xFF, 0xFF }, // SHORT U + { 0xFF, 0xFF }, // short u + { 40, 206 }, // U with macron + { 41, 206 }, // u with macron + { 0xFF, 0xFF }, // STRAIGHT U + { 0xFF, 0xFF }, // straight u + { 0xFF, 0xFF }, // STRAIGHT U BAR + { 0xFF, 0xFF }, // straight u bar + { 0xFF, 0xFF }, // OU ligature + { 0xFF, 0xFF }, // ou ligature + { 44, 204 }, // KHA with right descender + { 45, 204 }, // kha with right descender + { 44, 205 }, // KHA ogonek + { 45, 205 }, // kha ogonek + { 0xFF, 0xFF }, // H + { 0xFF, 0xFF }, // h + { 0xFF, 0xFF }, // OMEGA titlo + { 0xFF, 0xFF }, // omega titlo + { 0xFF, 0xFF }, // DZHE + { 0xFF, 0xFF }, // dzhe + { 48, 204 }, // CHE with right descender + { 49, 204 }, // che with right descender + { 0xFF, 0xFF }, // CHE vertical bar + { 0xFF, 0xFF }, // che vertical bar + { 0xFF, 0xFF }, // SHCHA (variant) + { 0xFF, 0xFF }, // shcha (variant) + { 0xFF, 0xFF }, // YAT + { 0xFF, 0xFF }, // yat + { 0xFF, 0xFF }, // YUS BOLSHOI + { 0xFF, 0xFF }, // yus bolshoi + { 0xFF, 0xFF }, // BIG MALYI + { 0xFF, 0xFF }, // big malyi + { 0xFF, 0xFF }, // KSI + { 0xFF, 0xFF }, // ksi + { 0xFF, 0xFF }, // PSI + { 0xFF, 0xFF }, // psi + { 0xFF, 0xFF }, // FITA + { 0xFF, 0xFF }, // fita + { 0xFF, 0xFF }, // IZHITSA + { 0xFF, 0xFF }, // izhitsa + { 00, F_RACUTE }, // Russian A acute + { 01, F_RACUTE }, // Russian a acute + { 10, F_RACUTE }, // Russian IE acute + { 11, F_RACUTE }, // Russian ie acute + { 78, F_RACUTE }, // Russian E acute + { 79, F_RACUTE }, // Russian e acute + { 18, F_RACUTE }, // Russian II acute + { 19, F_RACUTE }, // Russian ii acute + { 88, F_RACUTE }, // Russian I acute + { 89, F_RACUTE }, // Russian i acute + { 90, F_RACUTE }, // Russian YI acute + { 91, F_RACUTE }, // Russian yi acute + { 30, F_RACUTE }, // Russian O acute + { 31, F_RACUTE }, // Russian o acute + { 40, F_RACUTE }, // Russian U acute + { 41, F_RACUTE }, // Russian u acute + { 56, F_RACUTE }, // Russian YERI acute + { 57, F_RACUTE }, // Russian yeri acute + { 60, F_RACUTE }, // Russian REVERSED E acute + { 61, F_RACUTE }, // Russian reversed e acute + { 62, F_RACUTE }, // Russian IU acute + { 63, F_RACUTE }, // Russian iu acute + { 64, F_RACUTE }, // Russian IA acute + { 65, F_RACUTE }, // Russian ia acute + { 00, F_RGRAVE }, // Russian A grave + { 01, F_RGRAVE }, // Russian a grave + { 10, F_RGRAVE }, // Russian IE grave + { 11, F_RGRAVE }, // Russian ie grave + { 12, F_RGRAVE }, // Russian YO grave + { 13, F_RGRAVE }, // Russian yo grave + { 18, F_RGRAVE }, // Russian I grave + { 19, F_RGRAVE }, // Russian i grave + { 30, F_RGRAVE }, // Russian O grave + { 31, F_RGRAVE }, // Russian o grave + { 40, F_RGRAVE }, // Russian U grave + { 41, F_RGRAVE }, // Russian u grave + { 56, F_RGRAVE }, // Russian YERI grave + { 57, F_RGRAVE }, // Russian yeri grave + { 60, F_RGRAVE }, // Russian REVERSED E grave + { 61, F_RGRAVE }, // Russian reversed e grave + { 62, F_RGRAVE }, // Russian IU grave + { 63, F_RGRAVE }, // Russian iu grave + { 64, F_RGRAVE }, // Russian IA grave + { 65, F_RGRAVE } // Russian ia grave }; /**************************************************************************** @@ -959,7 +832,7 @@ static BASE_DIACRIT fwp_rus_c = /**************************************************************************** Desc: Table of pointers to character component tables. ****************************************************************************/ -static BASE_DIACRIT * fwp_car60_c[ NCHSETS] = +static BASE_DIACRIT * fwp_car60_c[ F_NCHSETS] = { (BASE_DIACRIT*)0, // no composed characters for ascii. &fwp_ml1c, @@ -1302,7 +1175,7 @@ static FLMBYTE MapCS26ToCharSet11[ 86] = /**************************************************************************** Desc: Conversion from single (Hankaku) to double (Zenkaku) wide characters - Used in flmWPHanToZenkaku() + Used in f_wpHanToZenkaku() Maps from charset 11 to CS24 (punctuation) (starting from 11,0) ****************************************************************************/ static FLMBYTE From0AToZen[] = @@ -3010,12 +2883,12 @@ Desc: This table describes and gives addresses for collating 5.0 ***************************************************************************/ static TBL_B_TO_BP fwp_col60Tbl[] = { - {CHSASCI, fwp_asc60Tbl}, // ascii - " " - "~" - {CHSMUL1, fwp_mn60Tbl}, // multinational - {CHSSYM1, fwp_sym60Tbl}, // symbols - {CHSGREK, fwp_grk60Tbl}, // greek - {CHSCYR, fwp_cyrl60Tbl}, // Cyrillic - Russian - {0xFF, 0} // table terminator + {F_CHSASCI, fwp_asc60Tbl}, + {F_CHSMUL1, fwp_mn60Tbl}, + {F_CHSSYM1, fwp_sym60Tbl}, + {F_CHSGREK, fwp_grk60Tbl}, + {F_CHSCYR, fwp_cyrl60Tbl}, + {0xFF, 0} }; /**************************************************************************** @@ -3024,15 +2897,15 @@ Desc: This table is for sorting the hebrew/arabic languages. ****************************************************************************/ static TBL_B_TO_BP fwp_HebArabicCol60Tbl[] = { - {CHSASCI, fwp_asc60Tbl}, // ascii - " " - "~" - {CHSMUL1, fwp_mn60Tbl}, // multinational - {CHSSYM1, fwp_sym60Tbl}, // symbols - {CHSGREK, fwp_grk60Tbl}, // greek - {CHSHEB, fwp_heb60TblA}, // Hebrew - {CHSHEB, fwp_heb60TblB}, // Hebrew - {CHSARB1, fwp_ar160Tbl}, // Arabic Set 1 - {CHSARB2, fwp_ar260Tbl}, // Arabic Set 2 - {0xff, 0} // table terminator + {F_CHSASCI, fwp_asc60Tbl}, + {F_CHSMUL1, fwp_mn60Tbl}, + {F_CHSSYM1, fwp_sym60Tbl}, + {F_CHSGREK, fwp_grk60Tbl}, + {F_CHSHEB, fwp_heb60TblA}, + {F_CHSHEB, fwp_heb60TblB}, + {F_CHSARB1, fwp_ar160Tbl}, + {F_CHSARB2, fwp_ar260Tbl}, + {0xff, 0} }; /**************************************************************************** @@ -5192,11 +5065,11 @@ FINLINE FLMBOOL charIsUpper( ui16Char <= ASCII_LOWER_Z) ? (FLMBOOL)FALSE : (FLMBOOL)TRUE) - : flmWPIsUpper( ui16Char))); + : f_wpIsUpper( ui16Char))); } /**************************************************************************** -Desc: getNextCharState can be thought of as a 2 dimentional array with +Desc: flmGetNextCharState can be thought of as a 2 dimentional array with i and j as the row and column indicators respectively. If a value exists at the intersection of i and j, it is returned. Sparse array techniques are used to minimize memory usage. @@ -5204,18 +5077,17 @@ Desc: getNextCharState can be thought of as a 2 dimentional array with Return: 0 = no valid next state non-zero = valid next state, offset for action, or collating value ****************************************************************************/ -FINLINE FLMUINT16 getNextCharState( +FINLINE FLMUINT16 flmGetNextCharState( FLMUINT i, FLMUINT j) { - FLMUINT k, x; + FLMUINT k; + FLMUINT x; - for( k = fwp_indexi[ x = - (i > START_COL) ? (START_ALL) : i ]; // adjust so don't use full tables + for( k = fwp_indexi[ x = (i > START_COL) ? (START_ALL) : i]; k <= (FLMUINT) (fwp_indexi[ x + 1] - 1); k++ ) { - // FIXUP_AREA_SIZE should be 24. if( j == fwp_indexj[ k]) { return( fwp_valuea[ (i > START_COL) @@ -5224,7 +5096,7 @@ FINLINE FLMUINT16 getNextCharState( } } - return(0); + return( 0); } /**************************************************************************** @@ -5257,6 +5129,37 @@ FLMBOOL FLMAPI f_unicodeToWP( return( FALSE); } +/**************************************************************************** +Desc: Convert a Unicode character to its WP equivalent using the + depricated FLAIM conversion rules +Ret: Returns TRUE if the character could be converted +****************************************************************************/ +FLMBOOL FLMAPI f_depricatedUnicodeToWP( + FLMUNICODE uUniChar, // Unicode character to convert + FLMUINT16 * pui16WPChar) // Returns 0 or WPChar converted. +{ + if( uUniChar < 127) + { + *pui16WPChar = uUniChar; + return( TRUE); + } + + if( uUniChar < gv_uiMinUniChar || + uUniChar > gv_uiMaxUniChar || + uUniChar > 0x222E) + { + *pui16WPChar = 0; + return( FALSE); + } + + if( (*pui16WPChar = gv_pUnicodeToWP60[ uUniChar - gv_uiMinUniChar]) != 0) + { + return( TRUE); + } + + return( FALSE); +} + /**************************************************************************** Desc: Convert a WP character to its Unicode equivalent ****************************************************************************/ @@ -5274,10 +5177,15 @@ RCODE FLMAPI f_wpToUnicode( if( ui16WPChar < gv_uiMinWPChar || ui16WPChar > gv_uiMaxWPChar) { - return( RC_SET_AND_ASSERT( NE_FLM_CONV_ILLEGAL)); + *puUniChar = 0; + return( RC_SET( NE_FLM_CONV_ILLEGAL)); + } + + if( (*puUniChar = gv_pWP60ToUnicode[ ui16WPChar - gv_uiMinWPChar]) == 0) + { + return( RC_SET( NE_FLM_CONV_ILLEGAL)); } - *puUniChar = gv_pWP60ToUnicode[ ui16WPChar - gv_uiMinWPChar]; return( NE_FLM_OK); } @@ -5519,7 +5427,7 @@ Exit: /**************************************************************************** Desc: Converts a character to upper case (if possible) ****************************************************************************/ -FLMUINT16 flmWPUpper( +FLMUINT16 FLMAPI f_wpUpper( FLMUINT16 ui16WpChar) { if( ui16WpChar < 256) @@ -5535,28 +5443,28 @@ FLMUINT16 flmWPUpper( { FLMBYTE ucCharSet = (FLMBYTE)(ui16WpChar >> 8); - if( ucCharSet == CHSMUL1) + if( ucCharSet == F_CHSMUL1) { FLMBYTE ucChar = (FLMBYTE)(ui16WpChar & 0xFF); - if( ucChar >= fwp_caseConvertableRange[ (CHSMUL1-1) * 2] && - ucChar <= fwp_caseConvertableRange[ ((CHSMUL1-1) * 2) + 1]) + if( ucChar >= fwp_caseConvertableRange[ (F_CHSMUL1 - 1) * 2] && + ucChar <= fwp_caseConvertableRange[ ((F_CHSMUL1 - 1) * 2) + 1]) { return( ui16WpChar & 0xFFFE); } } - else if( ucCharSet == CHSGREK) + else if( ucCharSet == F_CHSGREK) { if( (ui16WpChar & 0xFF) <= - fwp_caseConvertableRange[ ((CHSGREK-1) * 2) + 1]) + fwp_caseConvertableRange[ ((F_CHSGREK - 1) * 2) + 1]) { return( ui16WpChar & 0xFFFE); } } - else if( ucCharSet == CHSCYR) + else if( ucCharSet == F_CHSCYR) { if( (ui16WpChar & 0xFF) <= - fwp_caseConvertableRange[ ((CHSCYR-1) * 2) + 1]) + fwp_caseConvertableRange[ ((F_CHSCYR - 1) * 2) + 1]) { return( ui16WpChar & 0xFFFE); } @@ -5600,7 +5508,7 @@ FLMUINT16 flmWPUpper( /**************************************************************************** Desc: Checks to see if WP character is upper case ****************************************************************************/ -FLMBOOL flmWPIsUpper( +FLMBOOL FLMAPI f_wpIsUpper( FLMUINT16 ui16WpChar) { FLMBYTE ucChar; @@ -5623,13 +5531,9 @@ FLMBOOL flmWPIsUpper( ucCharSet = (FLMBYTE) (ui16WpChar >> 8); - // CHSMUL1 == Multinational 1 character set - // CHSGREK == Greek character set - // CHSCYR == Cyrillic character set - - if( (ucCharSet == CHSMUL1 && ucChar >= 26 && ucChar <= 241) || - (ucCharSet == CHSGREK && ucChar <= 69) || - (ucCharSet == CHSCYR && ucChar <= 199)) + if( (ucCharSet == F_CHSMUL1 && ucChar >= 26 && ucChar <= 241) || + (ucCharSet == F_CHSGREK && ucChar <= 69) || + (ucCharSet == F_CHSCYR && ucChar <= 199)) { return( (ucChar & 1) ? FALSE : TRUE); } @@ -5642,7 +5546,7 @@ FLMBOOL flmWPIsUpper( /**************************************************************************** Desc: Converts a character to lower case (if possible) ****************************************************************************/ -FLMUINT16 flmWPLower( +FLMUINT16 FLMAPI f_wpLower( FLMUINT16 ui16WpChar) { if( ui16WpChar < 256) @@ -5656,28 +5560,28 @@ FLMUINT16 flmWPLower( { FLMBYTE ucCharSet = (FLMBYTE)(ui16WpChar >> 8); - if( ucCharSet == CHSMUL1) + if( ucCharSet == F_CHSMUL1) { FLMBYTE ucChar = (FLMBYTE)(ui16WpChar & 0xFF); - if( ucChar >= fwp_caseConvertableRange[ (CHSMUL1-1) * 2] && - ucChar <= fwp_caseConvertableRange[ ((CHSMUL1-1) * 2) + 1] ) + if( ucChar >= fwp_caseConvertableRange[ (F_CHSMUL1 - 1) * 2] && + ucChar <= fwp_caseConvertableRange[ ((F_CHSMUL1 - 1) * 2) + 1] ) { return( ui16WpChar | 1); } } - else if( ucCharSet == CHSGREK) + else if( ucCharSet == F_CHSGREK) { if( (ui16WpChar & 0xFF) <= - fwp_caseConvertableRange[ ((CHSGREK-1) * 2) + 1]) + fwp_caseConvertableRange[ ((F_CHSGREK - 1) * 2) + 1]) { return( ui16WpChar | 1); } } - else if( ucCharSet == CHSCYR) + else if( ucCharSet == F_CHSCYR) { if( (ui16WpChar & 0xFF) <= - fwp_caseConvertableRange[ ((CHSCYR-1) * 2) + 1]) + fwp_caseConvertableRange[ ((F_CHSCYR-1) * 2) + 1]) { return( ui16WpChar | 1); } @@ -5720,10 +5624,8 @@ FLMUINT16 flmWPLower( /**************************************************************************** Desc: Break a WP character into a base and a diacritical char. -Ret: TRUE - if not found - FALSE - if found ****************************************************************************/ -FLMBOOL flmWPBrkcar( +FLMBOOL FLMAPI f_breakWPChar( FLMUINT16 ui16WpChar, FLMUINT16 * pui16BaseChar, FLMUINT16 * pui16DiacriticChar) @@ -5731,12 +5633,14 @@ FLMBOOL flmWPBrkcar( BASE_DIACRIT * pBaseDiacritic; FLMINT iTableIndex; - if( (pBaseDiacritic = fwp_car60_c[ HI(ui16WpChar)]) == 0) + if( HI(ui16WpChar) >= F_NCHSETS || + (pBaseDiacritic = fwp_car60_c[ HI(ui16WpChar)]) == 0) { return( TRUE); } iTableIndex = ((FLMBYTE)ui16WpChar) - pBaseDiacritic->start_char; + if( iTableIndex < 0 || iTableIndex > pBaseDiacritic->char_count || pBaseDiacritic->table [iTableIndex].base == (FLMBYTE)0xFF) @@ -5744,7 +5648,7 @@ FLMBOOL flmWPBrkcar( return( TRUE); } - if( (HI( ui16WpChar) != CHSMUL1) || + if( (HI( ui16WpChar) != F_CHSMUL1) || ((fwp_ml1_cb60[ ((FLMBYTE) ui16WpChar) >> 3] >> (7 - (ui16WpChar & 0x07))) & 0x01)) { @@ -5769,45 +5673,109 @@ FLMBOOL flmWPBrkcar( return( FALSE); } +/**************************************************************************** +Desc: Take a base and a diacritic and compose a WP character. + Note on base character: i's and j's must be dotless i's and j's (for + those which use them) or they will not be found. +Ret: TRUE - if not found + FALSE - if found +Notes: ascii characters with diacriticals are in multi-national if anywhere; + all other base chars with diacritics are found in their own sets. +****************************************************************************/ +FLMBOOL FLMAPI f_combineWPChar( + FLMUINT16 * pui16WpChar, + FLMUINT16 ui16BaseChar, + FLMINT16 ui16DiacriticChar) +{ + FLMUINT uiRemaining; + FLMBYTE ucCharSet; + FLMBYTE ucChar; + BASE_DIACRIT * pBaseDiacritic; + BASE_DIACRIT_TABLE * pTable; + + ucCharSet = HI( ui16BaseChar); + if( ucCharSet > F_NCHSETS) + { + return( TRUE); + } + + // Is base ASCII? If so, look in multinational 1 + + if( !ucCharSet) + { + ucCharSet = F_CHSMUL1; + } + + if( ucCharSet >= F_NCHSETS || + (pBaseDiacritic = fwp_car60_c[ ucCharSet]) == 0) + { + return( TRUE); + } + + ucChar = LO( ui16BaseChar); + ui16DiacriticChar = LO( ui16DiacriticChar); + pTable = pBaseDiacritic->table; + + for( uiRemaining = pBaseDiacritic->char_count; + uiRemaining; + uiRemaining--, pTable++ ) + { + // Same base? + + if( pTable->base == ucChar && + (pTable->diacrit & 0x7F) == ui16DiacriticChar) + { + // Same diacritic? + + *pui16WpChar = (FLMUINT16) (((FLMUINT16) ucCharSet << 8) + + (pBaseDiacritic->start_char + + (FLMUINT16)(pTable - pBaseDiacritic->table))); + return( FALSE); + } + } + + return( TRUE); +} + /************************************************************************** Desc: Find the collating value of a WP character ret: Collating value (COLS0 is high value - undefined WP char) ***********************************************************************/ -FLMUINT16 flmWPGetCollation( - FLMUINT16 ui16WpChar, - FLMUINT uiLanguage) +FLMUINT16 FLMAPI f_wpGetCollationImp( + FLMUINT16 ui16WpChar, + FLMUINT uiLanguage) { FLMUINT16 ui16State; FLMBYTE ucCharVal; FLMBYTE ucCharSet; - FLMBOOL bHebrewArabicFlag = FALSE; - TBL_B_TO_BP * pColTbl = fwp_col60Tbl; + FLMBOOL bHebrewArabicFlag; + TBL_B_TO_BP * pColTbl; - // State ONLY for non-US - - if( uiLanguage != FLM_US_LANG) + if( uiLanguage == FLM_US_LANG) { - if( uiLanguage == FLM_AR_LANG || // Arabic - uiLanguage == FLM_FA_LANG || // Farsi - persian - uiLanguage == FLM_HE_LANG || // Hebrew - uiLanguage == FLM_UR_LANG) // Urdu - { - pColTbl = fwp_HebArabicCol60Tbl; - bHebrewArabicFlag = TRUE; - } - else - { - // check if uiLanguage candidate for alternate double collating + return( gv_pui16USCollationTable[ ui16WpChar]); + } + else if( uiLanguage == FLM_AR_LANG || uiLanguage == FLM_FA_LANG || + uiLanguage == FLM_HE_LANG || uiLanguage == FLM_UR_LANG) + { + pColTbl = fwp_HebArabicCol60Tbl; + bHebrewArabicFlag = TRUE; + } + else + { + // Check if uiLanguage candidate for alternate double collating - ui16State = getNextCharState( START_COL, uiLanguage); - if( 0 != (ui16State = getNextCharState( (ui16State - ? ui16State // look at special case languages - : START_ALL), // look at US and European - (FLMUINT) ui16WpChar))) - { - return( ui16State); - } + ui16State = flmGetNextCharState( START_COL, uiLanguage); + if( 0 != (ui16State = flmGetNextCharState( (ui16State + ? ui16State // look at special case languages + : START_ALL), // look at US and European + (FLMUINT) ui16WpChar))) + { + return( ui16State); } + + pColTbl = fwp_col60Tbl; + bHebrewArabicFlag = FALSE; } ucCharVal = (FLMBYTE)ui16WpChar; @@ -5817,14 +5785,12 @@ FLMUINT16 flmWPGetCollation( { if( pColTbl->key == ucCharSet) { - FLMBYTE * pucColVals; // table of collating values - - pucColVals = pColTbl->charPtr; + FLMBYTE * pucColVals = pColTbl->charPtr; // Check if the value is in the range of collated chars // Above lower range of table? - if (ucCharVal >= *pucColVals) + if( ucCharVal >= *pucColVals) { // Make value zero based to index @@ -5844,16 +5810,14 @@ FLMUINT16 flmWPGetCollation( // Go to next table entry pColTbl++; + } while( pColTbl->key != 0xFF); if( bHebrewArabicFlag) { - if( ucCharSet == CHSHEB || - ucCharSet == CHSARB1 || - ucCharSet == CHSARB2) + if( ucCharSet == F_CHSHEB || ucCharSet == F_CHSARB1 || + ucCharSet == F_CHSARB2) { - // Same as COLS0_HEBREW - return( COLS0_ARABIC); } } @@ -5880,22 +5844,22 @@ RCODE FLMAPI f_wpCheckDoubleCollation( FLMBOOL * pbTwoIntoOne, FLMUINT uiLanguage) { - RCODE rc = NE_FLM_OK; - FLMUINT16 ui16CurState; - FLMUINT16 ui16WpChar; - FLMUNICODE uzLastChar = 0; - FLMUNICODE uChar = *puzChar; - FLMUNICODE uDummy; - FLMBOOL bUpperFlag; - FLMUINT64 ui64SavePosition = pIStream->getCurrPosition(); + RCODE rc = NE_FLM_OK; + FLMUINT16 ui16CurState; + FLMUINT16 ui16WpChar; + FLMUNICODE uzLastChar = 0; + FLMUNICODE uChar = *puzChar; + FLMUNICODE uDummy; + FLMBOOL bUpperFlag; + FLMUINT64 ui64SavePosition = pIStream->getCurrPosition(); if (!f_unicodeToWP( *puzChar, &ui16WpChar)) { ui16WpChar = UNK_UNICODE_CODE; } - bUpperFlag = flmWPIsUpper( ui16WpChar); + bUpperFlag = f_wpIsUpper( ui16WpChar); - if ((ui16CurState = getNextCharState( 0, uiLanguage)) == 0) + if ((ui16CurState = flmGetNextCharState( 0, uiLanguage)) == 0) { *pbTwoIntoOne = FALSE; *puzChar2 = 0; @@ -5907,10 +5871,14 @@ RCODE FLMAPI f_wpCheckDoubleCollation( switch (ui16CurState) { case INSTSG: + { *puzChar = *puzChar2 = (FLMUNICODE)f_toascii( 's'); *pbTwoIntoOne = FALSE; goto Exit; + } + case INSTAE: + { if (bUpperFlag) { *puzChar = (FLMUNICODE)f_toascii( 'A'); @@ -5923,7 +5891,10 @@ RCODE FLMAPI f_wpCheckDoubleCollation( } *pbTwoIntoOne = FALSE; goto Exit; + } + case INSTIJ: + { if (bUpperFlag) { *puzChar = (FLMUNICODE)f_toascii( 'I'); @@ -5936,7 +5907,10 @@ RCODE FLMAPI f_wpCheckDoubleCollation( } *pbTwoIntoOne = FALSE; goto Exit; + } + case INSTOE: + { if (bUpperFlag) { *puzChar = (FLMUNICODE)f_toascii( 'O'); @@ -5949,7 +5923,10 @@ RCODE FLMAPI f_wpCheckDoubleCollation( } *pbTwoIntoOne = FALSE; goto Exit; + } + case WITHAA: + { *puzChar = (FLMUNICODE)(bUpperFlag ? (FLMUNICODE)0xC5 : (FLMUNICODE)0xE5); @@ -5982,7 +5959,10 @@ RCODE FLMAPI f_wpCheckDoubleCollation( ui64SavePosition = pIStream->getCurrPosition(); break; + } + case AFTERC: + { *puzChar = (FLMUINT16)(bUpperFlag ? (FLMUNICODE)f_toascii( 'C') : (FLMUNICODE)f_toascii( 'c')); @@ -6022,23 +6002,33 @@ Position_After_2nd: ui64SavePosition = pIStream->getCurrPosition(); } goto Exit; + } + case AFTERH: + { *puzChar = (FLMUINT16)(bUpperFlag ? (FLMUNICODE)f_toascii( 'H') : (FLMUNICODE)f_toascii( 'h')); goto Position_After_2nd; + } + case AFTERL: + { *puzChar = (FLMUINT16)(bUpperFlag ? (FLMUNICODE)f_toascii( 'L') : (FLMUNICODE)f_toascii( 'l')); goto Position_After_2nd; + } + default: + { // Handles STATE1 through STATE11 also break; + } } - if ((ui16CurState = getNextCharState( ui16CurState, - flmWPLower( ui16WpChar))) == 0) + if ((ui16CurState = flmGetNextCharState( ui16CurState, + f_wpLower( ui16WpChar))) == 0) { break; } @@ -6082,6 +6072,169 @@ Exit: return( rc); } +/**************************************************************************** +Desc: Check for double characters that sort as 1 (like ch in Spanish) or + 1 character that should sort as 2 (like � sorts as ae in French). +Return: 0 = nothing changes. Otherwise, *pui16WpChar is the first + character, and the return value contains the 2nd character. + In addition, *pbTwoIntoOne will be TRUE if we should take two + characters and treat as one (i.e, change the collation on the + outside to one more than the collation of the first character). +****************************************************************************/ +FLMUINT16 FLMAPI f_wpCheckDoubleCollation( + FLMUINT16 * pui16WpChar, + FLMBOOL * pbTwoIntoOne, + const FLMBYTE ** ppucInputStr, + FLMUINT uiLanguage) +{ + FLMUINT16 ui16CurState; + FLMUINT16 ui16WpChar; + FLMUINT16 ui16SecondChar; + FLMUINT16 ui16LastChar = 0; + FLMUINT uiInLen; + FLMBOOL bUpperFlag; + + ui16WpChar = *pui16WpChar; + bUpperFlag = f_wpIsUpper( ui16WpChar); + + uiInLen = 0; + ui16SecondChar = 0; + + // Primer read + + if ((ui16CurState = flmGetNextCharState( 0, uiLanguage)) == 0) + { + goto Exit; + } + + for (;;) + { + switch (ui16CurState) + { + case INSTSG: + { + *pui16WpChar = ui16SecondChar = (FLMUINT16) f_toascii( 's'); + *pbTwoIntoOne = FALSE; + goto Exit; + } + + case INSTAE: + { + if (bUpperFlag) + { + *pui16WpChar = (FLMUINT16) f_toascii( 'A'); + ui16SecondChar = (FLMUINT16) f_toascii( 'E'); + } + else + { + *pui16WpChar = (FLMUINT16) f_toascii( 'a'); + ui16SecondChar = (FLMUINT16) f_toascii( 'e'); + } + + *pbTwoIntoOne = FALSE; + goto Exit; + } + + case INSTIJ: + { + if (bUpperFlag) + { + *pui16WpChar = (FLMUINT16) f_toascii( 'I'); + ui16SecondChar = (FLMUINT16) f_toascii( 'J'); + } + else + { + *pui16WpChar = (FLMUINT16) f_toascii( 'i'); + ui16SecondChar = (FLMUINT16) f_toascii( 'j'); + } + + *pbTwoIntoOne = FALSE; + goto Exit; + } + + case INSTOE: + { + if (bUpperFlag) + { + *pui16WpChar = (FLMUINT16) f_toascii( 'O'); + ui16SecondChar = (FLMUINT16) f_toascii( 'E'); + } + else + { + *pui16WpChar = (FLMUINT16) f_toascii( 'o'); + ui16SecondChar = (FLMUINT16) f_toascii( 'e'); + } + + *pbTwoIntoOne = FALSE; + goto Exit; + } + + case WITHAA: + { + *pui16WpChar = (FLMUINT16) (bUpperFlag + ? (FLMUINT16) 0x122 + : (FLMUINT16) 0x123); + (*ppucInputStr)++; + break; + } + + case AFTERC: + { + *pui16WpChar = (FLMUINT16) (bUpperFlag + ? (FLMUINT16) f_toascii( 'C') + : (FLMUINT16) f_toascii( 'c')); + ui16SecondChar = ui16LastChar; + *pbTwoIntoOne = TRUE; + (*ppucInputStr)++; + goto Exit; + } + + case AFTERH: + { + *pui16WpChar = (FLMUINT16) (bUpperFlag + ? (FLMUINT16) f_toascii( 'H') + : (FLMUINT16) f_toascii( 'h')); + ui16SecondChar = ui16LastChar; + *pbTwoIntoOne = TRUE; + (*ppucInputStr)++; + goto Exit; + } + + case AFTERL: + { + *pui16WpChar = (FLMUINT16) (bUpperFlag + ? (FLMUINT16) f_toascii( 'L') + : (FLMUINT16) f_toascii( 'l')); + ui16SecondChar = ui16LastChar; + *pbTwoIntoOne = TRUE; + (*ppucInputStr)++; + goto Exit; + } + + default: + { + + // Handles STATE1 through STATE11 also + + break; + } + } + + if ((ui16CurState = flmGetNextCharState( ui16CurState, + f_wpLower( ui16WpChar))) == 0) + { + goto Exit; + } + + ui16LastChar = ui16WpChar; + ui16WpChar = (FLMUINT16) * ((*ppucInputStr) + (uiInLen++)); + } + +Exit: + + return (ui16SecondChar); +} + /**************************************************************************** Desc: Returns the collation value of the input WP character. If in charset 11 will convert the character to Zenkaku (double wide). @@ -6119,22 +6272,22 @@ Terms: ****************************************************************************/ FLMUINT16 flmWPAsiaGetCollation( - FLMUINT16 ui16WpChar, // WP char to get collation values - FLMUINT16 ui16NextWpChar, // Next WP char - for CS11 voicing marks - FLMUINT16 ui16PrevColValue, // Previous collating value - FLMUINT16 * pui16ColValue, // Returns collation value - FLMUINT16 * pui16SubColVal, // Returns sub-collation value - FLMBYTE * pucCaseBits, // Returns case bits value - FLMBOOL bUppercaseFlag) // Set if to convert to uppercase + FLMUINT16 ui16WpChar, // WP char to get collation values + FLMUINT16 ui16NextWpChar, // Next WP char - for CS11 voicing marks + FLMUINT16 ui16PrevColValue, // Previous collating value + FLMUINT16 * pui16ColValue, // Returns collation value + FLMUINT16 * pui16SubColVal, // Returns sub-collation value + FLMBYTE * pucCaseBits, // Returns case bits value + FLMBOOL bUppercaseFlag) // Set if to convert to uppercase { - FLMUINT16 ui16ColValue; - FLMUINT16 ui16SubColVal; - FLMBYTE ucCaseBits = 0; - FLMBYTE ucCharSet = (FLMBYTE)(ui16WpChar >> 8); - FLMBYTE ucCharVal = (FLMBYTE)(ui16WpChar & 0xFF); - FLMUINT16 ui16Hankaku; - FLMUINT uiLoop; - FLMUINT16 ui16ReturnValue = 1; + FLMUINT16 ui16ColValue; + FLMUINT16 ui16SubColVal; + FLMBYTE ucCaseBits = 0; + FLMBYTE ucCharSet = (FLMBYTE)(ui16WpChar >> 8); + FLMBYTE ucCharVal = (FLMBYTE)(ui16WpChar & 0xFF); + FLMUINT16 ui16Hankaku; + FLMUINT uiLoop; + FLMUINT16 ui16ReturnValue = 1; ui16ColValue = ui16SubColVal = 0; @@ -6165,9 +6318,9 @@ Latin_Greek_Cyrillic: // YES: Pass FLM_US_LANG because this is what we want - // Prevents double character sorting. - ui16ColValue = flmWPGetCollation( ui16WpChar, FLM_US_LANG); + ui16ColValue = f_wpGetCollation( ui16WpChar, FLM_US_LANG); - if (bUppercaseFlag || flmWPIsUpper( ui16WpChar)) + if (bUppercaseFlag || f_wpIsUpper( ui16WpChar)) { // Uppercase - set case bit @@ -6179,7 +6332,7 @@ Latin_Greek_Cyrillic: if( ui16ColValue == COLS0) { ui16ReturnValue = 0; - if( !flmWPIsUpper( ui16WpChar)) + if( !f_wpIsUpper( ui16WpChar)) { // Convert to uppercase @@ -6190,24 +6343,24 @@ Latin_Greek_Cyrillic: } else if( ucCharSet) // Don't bother with ascii { - if( !flmWPIsUpper( ui16WpChar)) + if( !f_wpIsUpper( ui16WpChar)) { // Convert to uppercase ui16WpChar--; } - if( ucCharSet == CHSMUL1) + if( ucCharSet == F_CHSMUL1) { FLMUINT16 ui16Base; FLMUINT16 ui16Diacritic; - ui16SubColVal = !flmWPBrkcar( ui16WpChar, &ui16Base, + ui16SubColVal = !f_breakWPChar( ui16WpChar, &ui16Base, &ui16Diacritic) ? fwp_dia60Tbl[ ui16Diacritic & 0xFF] : ui16WpChar; } - else if( ucCharSet == CHSGREK) // GREEK + else if( ucCharSet == F_CHSGREK) { if( ui16WpChar >= 0x834 || // [8,52] or above ui16WpChar == 0x804 || // [8,4] BETA Medial | Terminal @@ -6216,7 +6369,7 @@ Latin_Greek_Cyrillic: ui16SubColVal = ui16WpChar; } } - else if( ucCharSet == CHSCYR) // CYRILLIC + else if( ucCharSet == F_CHSCYR) { if( ui16WpChar >= 0xA90) // [10, 144] or above { @@ -6238,7 +6391,7 @@ Latin_Greek_Cyrillic: // All characters in charset 11 will convert to CS24 or CS26. // when combining the collation and the sub-collation values. - if( flmWPHanToZenkaku( ui16WpChar, + if( f_wpHanToZenkaku( ui16WpChar, ui16NextWpChar, &ui16KanaChar ) == 2) { // Return 2 @@ -6294,7 +6447,7 @@ Latin_Greek_Cyrillic: // combined with the sub-collation value will format a character in // CS24. The width bit will then convert back to CS11. - if( (ui16Hankaku = flmWPZenToHankaku( ui16WpChar, NULL)) != 0) + if( (ui16Hankaku = f_wpZenToHankaku( ui16WpChar, NULL)) != 0) { if( (ui16Hankaku >> 8) != 11) // if CharSet11 was a CS24 symbol { @@ -6376,7 +6529,7 @@ Desc: Convert a zenkaku (double wide) char to a hankaku (single wide) char Ret: Hankaku char or 0 if a conversion doesn't exist Notes: Taken from CHAR.ASM - zen2han_f routine ****************************************************************************/ -FSTATIC FLMUINT16 flmWPZenToHankaku( +FLMUINT16 FLMAPI f_wpZenToHankaku( FLMUINT16 ui16WpChar, FLMUINT16 * pui16DakutenOrHandakuten) { @@ -6397,12 +6550,13 @@ FSTATIC FLMUINT16 flmWPZenToHankaku( { // List is sorted so table entry is more you are done - if( Zen24ToHankaku [uiLoop].ByteValue >= ucCharVal) + if( Zen24ToHankaku[ uiLoop].ByteValue >= ucCharVal) { - if( Zen24ToHankaku [uiLoop].ByteValue == ucCharVal) + if( Zen24ToHankaku[ uiLoop].ByteValue == ucCharVal) { - ui16Hankaku = Zen24ToHankaku [uiLoop].WordValue; + ui16Hankaku = Zen24ToHankaku[ uiLoop].WordValue; } + break; } } @@ -6528,7 +6682,7 @@ Ret: 0 = no conversion Notes: Taken from char.asm - han2zen() From8ToZen could be taken out and placed in code. ****************************************************************************/ -FSTATIC FLMUINT16 flmWPHanToZenkaku( +FLMUINT16 FLMAPI f_wpHanToZenkaku( FLMUINT16 ui16WpChar, FLMUINT16 ui16NextWpChar, FLMUINT16 * pui16Zenkaku) @@ -6624,7 +6778,7 @@ FSTATIC FLMUINT16 flmWPHanToZenkaku( { if( ucCharVal < 5) { - ui16Zenkaku = 0x2400 + From11AToZen[ ucCharVal ]; + ui16Zenkaku = 0x2400 + From11AToZen[ ucCharVal]; } else if( ucCharVal < 0x3D) // katakana? { @@ -6636,7 +6790,7 @@ FSTATIC FLMUINT16 flmWPHanToZenkaku( } else { - if( ui16NextWpChar == 0xB3D) // dakuten? - voicing + if( ui16NextWpChar == 0xB3D) { // First check exception(s) then // check if voicing exists! - will NOT access out of table @@ -6688,11 +6842,10 @@ FSTATIC FLMUINT16 flmWPHanToZenkaku( default: { - // Instead of includes more tables from char.asm - look down the - // Zen24Tohankaku[] table for a matching value - not much slower. + // Look in the Zen24Tohankaku table for a matching value for( uiLoop = 0; - uiLoop < (sizeof(Zen24ToHankaku) / sizeof(BYTE_WORD_TBL)); + uiLoop < (sizeof( Zen24ToHankaku) / sizeof( BYTE_WORD_TBL)); uiLoop++) { if( Zen24ToHankaku[ uiLoop].WordValue == ui16WpChar) @@ -6732,9 +6885,6 @@ FLMUINT FLMAPI f_languageToNum( if( f_langtbl [uiTablePos] == ucFirstChar && f_langtbl [uiTablePos+1] == ucSecondChar) { - - // Return uiTablePos div 2 - return( uiTablePos >> 1); } } @@ -6800,22 +6950,22 @@ FLMUINT16 flmWPGetSubCol( // This just happens to work with all WP character values. - if (!flmWPIsUpper( ui16WPValue)) + if (!f_wpIsUpper( ui16WPValue)) { ui16WPValue &= ~1; } switch (ucCharSet) { - case CHSMUL1: - + case F_CHSMUL1: + { // If you cannot break down a char into base and // diacritic then you cannot combine the charaacter // later when converting back the key. So, write // the entire WP char in the sub-collation area. // We can ONLY SUPPORT MULTINATIONAL 1 for brkcar() - if (flmWPBrkcar( ui16WPValue, &ui16Base, &ui16SubColVal)) + if (f_breakWPChar( ui16WPValue, &ui16Base, &ui16SubColVal)) { // WordPerfect character cannot be broken down. @@ -6836,22 +6986,21 @@ FLMUINT16 flmWPGetSubCol( // Bug 11/16/92 = was only writing a "1" and not "10" ui16SubColVal = ( - (ui16SubColVal & 0xFF) == umlaut + (ui16SubColVal & 0xFF) == F_UMLAUT && ( (uiLanguage == FLM_SU_LANG) || (uiLanguage == FLM_SV_LANG) || (uiLanguage == FLM_CZ_LANG) || (uiLanguage == FLM_SL_LANG) ) ) - ? (FLMUINT16)(fwp_dia60Tbl[ ring] + 1) // umlaut must be after ring above + ? (FLMUINT16)(fwp_dia60Tbl[ F_RING] + 1) // umlaut must be after ring above : (FLMUINT16)(fwp_dia60Tbl[ ui16SubColVal & 0xFF]); break; + } - case CHSGREK: - - // Greek - + case F_CHSGREK: + { if( (ucCharVal >= 52) || // Keep case bit for 52-69 else ignore (ui16WPValue == 0x804) || // [ 8,4] BETA Medial | Terminal (ui16WPValue == 0x826)) // [ 8,38] SIGMA termainal @@ -6860,18 +7009,20 @@ FLMUINT16 flmWPGetSubCol( } // else no subcollation to worry about break; + } - case CHSCYR: + case F_CHSCYR: + { if (ucCharVal >= 144) { ui16SubColVal = ui16WPValue; } - // else no subcollation to worry about - // VISIT: Georgian covers 208-249 - no collation defined yet break; + } - case CHSHEB: // Hebrew + case F_CHSHEB: + { // Three sections in Hebrew: // 0..26 - main characters @@ -6888,8 +7039,10 @@ FLMUINT16 flmWPGetSubCol( ui16SubColVal = ui16WPValue; } break; + } - case CHSARB1: // Arabic 1 + case F_CHSARB1: // Arabic 1 + { // Three sections in Arabic: // 00..37 - accents that display OVER a previous character @@ -6926,8 +7079,10 @@ FLMUINT16 flmWPGetSubCol( } } break; + } - case CHSARB2: // Arabic 2 + case F_CHSARB2: // Arabic 2 + { // There are some characters that share the same slot // Check the bit table if above character 64 @@ -6938,7 +7093,7 @@ FLMUINT16 flmWPGetSubCol( ui16SubColVal = ui16WPValue; } break; - + } } Exit: @@ -7054,6 +7209,7 @@ GetNextChar: ui64AfterLastSpacePos = m_pIStream->getCurrPosition(); } + goto GetNextChar; } } @@ -7188,13 +7344,13 @@ Process_Char: { if (!bAsian) { - ui16Col = flmWPGetCollation( ui16WpChar, m_uiLanguage); + ui16Col = f_wpGetCollation( ui16WpChar, m_uiLanguage); if (bTwoIntoOne) { // Since two characters were merged into one, increment // the collation value by one. In the case of something // like 'ch', there is a collation value between 'c' and - // 'd'. flmWPGetCollation would have returned the + // 'd'. f_wpGetCollation would have returned the // collation value for 'c' ... incrementing by one gives // us the proper collation value for 'ch' (i.e., the // collation value between 'c' and 'd'). @@ -7253,7 +7409,7 @@ Process_Char: if( ui16WpChar && ui16SubCol == ui16WpChar) { ui16SubCol = flmWPGetSubCol( - flmWPUpper( ui16WpChar), + f_wpUpper( ui16WpChar), ui16Col, m_uiLanguage); } } @@ -7274,14 +7430,14 @@ Process_Char: { if (!bAsian && ui16WpChar) { - // flmWPIsUpper() returns FALSE if the character is lower or + // f_wpIsUpper() returns FALSE if the character is lower or // TRUE if the character is not lower case. - if( flmWPIsUpper( ui16WpChar)) + if( f_wpIsUpper( ui16WpChar)) { if( bTwoIntoOne) { - if( flmWPIsUpper( ui16NextWpChar)) + if( f_wpIsUpper( ui16NextWpChar)) { ucCase = 0x03; } @@ -7610,13 +7766,13 @@ Desc: Called by ftkStartup, this routine initializes the Unicode to ****************************************************************************/ RCODE f_initCharMappingTables( void) { + RCODE rc = NE_FLM_OK; FLMUINT16 * puStaticPtr; FLMUINT uiLoop; FLMUINT uiEntries; FLMUINT uiOffset; - RCODE rc = NE_FLM_OK; - if( gv_pUnicodeToWP60 || gv_pWP60ToUnicode) + if( gv_pUnicodeToWP60 || gv_pWP60ToUnicode || gv_pui16USCollationTable) { rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); goto Exit; @@ -7704,6 +7860,62 @@ RCODE f_initCharMappingTables( void) gv_pWP60ToUnicode[ uiOffset] = puStaticPtr[ 0]; } + // Allocate the US collation mapping table + + uiEntries = 0x10000; + if (RC_BAD( rc = f_calloc( uiEntries * sizeof( FLMUINT16), + &gv_pui16USCollationTable))) + { + goto Exit; + } + + // Populate the US collation mapping table + + for( uiLoop = 0; uiLoop < uiEntries; uiLoop++) + { + FLMBYTE ucCharVal = (FLMBYTE)uiLoop; + FLMBYTE ucCharSet = (FLMBYTE)(uiLoop >> 8); + TBL_B_TO_BP * pColTbl = fwp_col60Tbl; + + do + { + if( pColTbl->key == ucCharSet) + { + FLMBYTE * pucColVals = pColTbl->charPtr; + + // Check if the value is in the range of collated chars + // Above lower range of table? + + if( ucCharVal >= *pucColVals) + { + // Make value zero based to index + + ucCharVal -= *pucColVals++; + + // Below maximum number of table entries? + + if( ucCharVal < *pucColVals++) + { + // Return collated value. + + gv_pui16USCollationTable[ uiLoop] = pucColVals[ ucCharVal]; + break; + } + } + } + + // Go to next table entry + + pColTbl++; + + } while( pColTbl->key != 0xFF); + + if( pColTbl->key == 0xFF) + { + gv_pui16USCollationTable[ uiLoop] = COLS0; + } + } + Exit: if( RC_BAD( rc)) @@ -7718,6 +7930,11 @@ Exit: f_free( &gv_pWP60ToUnicode); } + if( gv_pui16USCollationTable) + { + f_free( &gv_pui16USCollationTable); + } + gv_uiMinUniChar = 0; gv_uiMaxUniChar = 0; @@ -7744,6 +7961,11 @@ void f_freeCharMappingTables( void) f_free( &gv_pWP60ToUnicode); } + if( gv_pui16USCollationTable) + { + f_free( &gv_pui16USCollationTable); + } + gv_uiMinUniChar = 0; gv_uiMaxUniChar = 0; @@ -7757,7 +7979,7 @@ Out: WP characters that have been modified to their original case Ret: Number of bytes used in the lower/upper buffer Notes: Only WP to lower case conversion is done here for each bit NOT set. ***************************************************************************/ -FSTATIC FLMUINT flmWPToMixed( +FLMUINT FLMAPI f_wpToMixed( FLMBYTE * pucWPStr, // Existing WP string to modify FLMUINT uiWPStrLen, // Length of the WP string in bytes const FLMBYTE * pucLowUpBitStr, // Lower/upper case bit string @@ -7776,11 +7998,10 @@ FSTATIC FLMUINT flmWPToMixed( : (FLMBYTE)0 ; // For each character (two bytes) in the word string ... - for( uiNumChars = uiWPStrLen >> 1, - ucMaskByte = 0; // Force first time to get a byte - uiNumChars--; - pucWPStr += 2, // Next WP character - word - ucMaskByte >>= 1) // Next bit to mask and check + + for( uiNumChars = uiWPStrLen >> 1, ucMaskByte = 0; + uiNumChars--; + pucWPStr += 2, ucMaskByte >>= 1) { if( ucMaskByte == 0) { @@ -7808,10 +8029,10 @@ FSTATIC FLMUINT flmWPToMixed( // Check if charact within region of character set - if( ((ucCharSet == CHSMUL1) && + if( ((ucCharSet == F_CHSMUL1) && ((ucCharVal >= 26) && (ucCharVal <= 241))) || - ((ucCharSet == CHSGREK) && (ucCharVal <= 69)) || - ((ucCharSet == CHSCYR) && (ucCharVal <= 199))) + ((ucCharSet == F_CHSGREK) && (ucCharVal <= 69)) || + ((ucCharSet == F_CHSCYR) && (ucCharVal <= 199))) { uiTempWord |= 0x01; // Set the bit ... don't increment! } @@ -7824,68 +8045,6 @@ FSTATIC FLMUINT flmWPToMixed( return( bytesInBits( uiNumChars)); } -/**************************************************************************** -Desc: Take a base and a diacritic and compose a WP character. - Note on base character: i's and j's must be dotless i's and j's (for - those which use them) or they will not be found. -Ret: TRUE - if not found - FALSE - if found -Notes: ascii characters with diacriticals are in multi-national if anywhere; - all other base chars with diacritics are found in their own sets. -****************************************************************************/ -FSTATIC FLMBOOL flmWPCmbcar( - FLMUINT16 * pui16WpChar, - FLMUINT16 ui16BaseChar, - FLMINT16 ui16DiacriticChar) -{ - FLMUINT uiRemaining; - FLMBYTE ucCharSet; - FLMBYTE ucChar; - BASE_DIACRIT * pBaseDiacritic; - BASE_DIACRIT_TABLE * pTable; - - ucCharSet = HI( ui16BaseChar); - if( ucCharSet > WP_MAX_CAR60_SIZE) - { - return( TRUE); - } - - // Is base ASCII? If so, look in multinational 1 - - if( !ucCharSet) - { - ucCharSet = CHSMUL1; - } - - if( (pBaseDiacritic = fwp_car60_c[ucCharSet]) == 0) - { - return( TRUE); - } - - ucChar = LO( ui16BaseChar); - ui16DiacriticChar = LO( ui16DiacriticChar); - pTable = pBaseDiacritic->table; - for( uiRemaining = pBaseDiacritic->char_count; - uiRemaining; - uiRemaining--, pTable++ ) - { - // Same base? - - if( pTable->base == ucChar && - (pTable->diacrit & 0x7F) == ui16DiacriticChar) - { - // Same diacritic? - - *pui16WpChar = (FLMUINT16) (((FLMUINT16) ucCharSet << 8) + - (pBaseDiacritic->start_char + - (FLMUINT16)(pTable - pBaseDiacritic->table))); - return( FALSE); - } - } - - return( TRUE); -} - /**************************************************************************** Desc: Convert a text string to a collated string. If NE_FLM_CONV_DEST_OVERFLOW is returned the string is truncated as @@ -8041,7 +8200,7 @@ RCODE flmUTF8ToColText( if (!charIsUpper( ui16WpChr)) { - uiFlags |= HAD_LOWER_CASE; + uiFlags |= F_HAD_LOWER_CASE; } else { @@ -8056,7 +8215,7 @@ RCODE flmUTF8ToColText( // Get the collated value from the WP character-if not collating value if ((pucCollatedStr[ uiColLen++] = - (FLMBYTE)(flmWPGetCollation( ui16WpChr, uiLanguage))) >= COLS11) + (FLMBYTE)(f_wpGetCollation( ui16WpChr, uiLanguage))) >= COLS11) { FLMUINT uiTemp; @@ -8125,7 +8284,7 @@ store_extended_char: ucSubColBuf [(uiSubColBitPos + 8) >> 3] = 0; ucSubColBuf [(uiSubColBitPos + 16) >> 3] = 0; - uiFlags |= HAD_SUB_COLLATION; + uiFlags |= F_HAD_SUB_COLLATION; // Set 110 bits in sub-collation - continued from above. // No need to explicitly set the zero, but must increment @@ -8178,14 +8337,14 @@ store_extended_char: switch( ucCharSet) { - case CHSMUL1: // Multinational 1 + case F_CHSMUL1: // Multinational 1 { // If we cannot break down a char into base and // diacritic we cannot combine the charaacter // later when converting back the key. In that case, // write the entire WP char in the sub-collation area. - if( flmWPBrkcar( ui16WpChr, &ui16Base, &ui16SubColVal)) + if( f_breakWPChar( ui16WpChr, &ui16Base, &ui16SubColVal)) { goto store_extended_char; } @@ -8196,19 +8355,19 @@ store_extended_char: // NOTE: The "unlaut" character must sort after the "ring" // character. - ui16SubColVal = ((ui16SubColVal & 0xFF) == umlaut && + ui16SubColVal = ((ui16SubColVal & 0xFF) == F_UMLAUT && (uiLanguage == FLM_SU_LANG || uiLanguage == FLM_SV_LANG || uiLanguage == FLM_CZ_LANG || uiLanguage == FLM_SL_LANG)) - ? (FLMUINT16)(fwp_dia60Tbl[ ring] + 1) + ? (FLMUINT16)(fwp_dia60Tbl[ F_RING] + 1) : (FLMUINT16)(fwp_dia60Tbl[ ui16SubColVal & 0xFF]); store_sub_col: // Set the next byte that follows in the sub collation buffer. ucSubColBuf[ (uiSubColBitPos + 8) >> 3] = 0; - uiFlags |= HAD_SUB_COLLATION; + uiFlags |= F_HAD_SUB_COLLATION; // Set the 10 bits - no need to explicitly set the zero, but // must increment for it. @@ -8223,7 +8382,7 @@ store_sub_col: break; } - case CHSGREK: // Greek + case F_CHSGREK: // Greek { if (ucChar >= 52 || // Keep case bit for 52-69 else ignore ui16WpChr == 0x804 || // [ 8,4] BETA Medial | Terminal @@ -8239,7 +8398,7 @@ store_sub_col: break; } - case CHSCYR: + case F_CHSCYR: { if (ucChar >= 144) { @@ -8256,7 +8415,7 @@ store_sub_col: break; } - case CHSHEB: // Hebrew + case F_CHSHEB: // Hebrew { // Three sections in Hebrew: // 0..26 - main characters @@ -8280,7 +8439,7 @@ store_sub_col: break; } - case CHSARB1: // Arabic 1 + case F_CHSARB1: // Arabic 1 { // Three sections in Arabic: // 00..37 - accents that display OVER a previous character @@ -8323,7 +8482,7 @@ store_sub_col: break; } - case CHSARB2: // Arabic 2 + case F_CHSARB2: // Arabic 2 { // There are some characters that share the same slot // Check the bit table if above character 64 @@ -8382,7 +8541,7 @@ store_sub_col: ucCaseBits[ (uiCaseBitPos + 7) >> 3] = 0; if( !charIsUpper( ui16WpChr2)) { - uiFlags |= HAD_LOWER_CASE; + uiFlags |= F_HAD_LOWER_CASE; } else { @@ -8401,7 +8560,7 @@ store_sub_col: // We have a digraph, get second collation value pucCollatedStr[ uiColLen++] = - (FLMBYTE)(flmWPGetCollation( ui16WpChr2, uiLanguage)); + (FLMBYTE)(f_wpGetCollation( ui16WpChr2, uiLanguage)); // Normal case, assume no diacritics set @@ -8462,12 +8621,12 @@ store_sub_col: if (bFirstSubstring) { - pucCollatedStr[ uiColLen++] = COLL_FIRST_SUBSTRING; + pucCollatedStr[ uiColLen++] = F_COLL_FIRST_SUBSTRING; } if (bDataTruncated) { - pucCollatedStr[ uiColLen++ ] = COLL_TRUNCATED; + pucCollatedStr[ uiColLen++ ] = F_COLL_TRUNCATED; } // Return NOTHING if no values found @@ -8489,20 +8648,21 @@ store_sub_col: } // Done putting the string into 4 sections - build the COLLATED KEY - // Don't set uiUppercaseFlag earlier than here because SC_LOWER may be zero + // Don't set uiUppercaseFlag earlier than here because F_SC_LOWER + // may be zero uiUppercaseFlag = (uiLanguage == FLM_GR_LANG) - ? SC_LOWER - : SC_UPPER; + ? F_SC_LOWER + : F_SC_UPPER; // Did we write anything to the subcollation area? - // The default terminating characters is (COLL_MARKER|SC_UPPER) + // The default terminating characters is (F_COLL_MARKER | F_SC_UPPER) - if (uiFlags & HAD_SUB_COLLATION) + if (uiFlags & F_HAD_SUB_COLLATION) { // Writes out a 0x7 - pucCollatedStr[ uiColLen++] = COLL_MARKER | SC_SUB_COL; + pucCollatedStr[ uiColLen++] = F_COLL_MARKER | F_SC_SUB_COL; // Move the sub-collation into the collating string @@ -8514,20 +8674,20 @@ store_sub_col: // Move the upper/lower case stuff - force bits for Greek ONLY // This is such a small size that a memcpy is not worth it - if( uiFlags & HAD_LOWER_CASE) + if( uiFlags & F_HAD_LOWER_CASE) { FLMUINT uiNumBytes = bytesInBits( uiCaseBitPos); FLMBYTE * pucCasePtr = ucCaseBits; // Output the 0x5 - pucCollatedStr[ uiColLen++] = (FLMBYTE)(COLL_MARKER | SC_MIXED); + pucCollatedStr[ uiColLen++] = (FLMBYTE)(F_COLL_MARKER | F_SC_MIXED); if( puiCaseLen) { *puiCaseLen = uiNumBytes + 1; } - if( uiUppercaseFlag == SC_LOWER) + if( uiUppercaseFlag == F_SC_LOWER) { // Negate case bits for languages (like GREEK) that sort // upper case before lower case. @@ -8550,7 +8710,7 @@ store_sub_col: // All characters are either upper or lower case, as determined // by uiUppercaseFlag. - pucCollatedStr[ uiColLen++] = (FLMBYTE)(COLL_MARKER | uiUppercaseFlag); + pucCollatedStr[ uiColLen++] = (FLMBYTE)(F_COLL_MARKER | uiUppercaseFlag); if( puiCaseLen) { *puiCaseLen = 1; @@ -8600,7 +8760,7 @@ RCODE FLMAPI f_colStr2WPStr( if( uiLang == FLM_US_LANG) { - while( uiLength && (pucColStr[ uiPos] > MAX_COL_OPCODE)) + while( uiLength && (pucColStr[ uiPos] > F_MAX_COL_OPCODE)) { uiLength--; @@ -8639,7 +8799,7 @@ RCODE FLMAPI f_colStr2WPStr( bHebrewArabic = TRUE; } - while( uiLength && (pucColStr[ uiPos] > MAX_COL_OPCODE)) + while( uiLength && (pucColStr[ uiPos] > F_MAX_COL_OPCODE)) { uiLength--; uiColChar = (FLMUINT)pucColStr[ uiPos++]; @@ -8757,7 +8917,7 @@ RCODE FLMAPI f_colStr2WPStr( // Check first substring before truncated - if( uiLength && pucColStr[ uiPos] == COLL_FIRST_SUBSTRING) + if( uiLength && pucColStr[ uiPos] == F_COLL_FIRST_SUBSTRING) { if( pbFirstSubstring) { @@ -8769,7 +8929,7 @@ RCODE FLMAPI f_colStr2WPStr( // Is the key truncated? - if( uiLength && pucColStr[ uiPos] == COLL_TRUNCATED) + if( uiLength && pucColStr[ uiPos] == F_COLL_TRUNCATED) { if( pbDataTruncated) { @@ -8783,7 +8943,7 @@ RCODE FLMAPI f_colStr2WPStr( // Still more to process - first work on the sub-collation (diacritics) // Hebrew/Arabic may have empty collation area - if( uiLength && (pucColStr[ uiPos] == (COLL_MARKER | SC_SUB_COL))) + if( uiLength && (pucColStr[ uiPos] == (F_COLL_MARKER | F_SC_SUB_COL))) { FLMUINT uiTempLen; @@ -8809,10 +8969,10 @@ RCODE FLMAPI f_colStr2WPStr( // Take care of the lower and upper case conversion // If mixed case then convert using case bits - if( pucColStr[ uiPos++] & SC_MIXED) // Increment pos here! + if( pucColStr[ uiPos++] & F_SC_MIXED) // Increment pos here! { // Don't pre-increment pos on line below! - uiPos += flmWPToMixed( pucWPStr, uiWPStrLen, + uiPos += f_wpToMixed( pucWPStr, uiWPStrLen, &pucColStr[ uiPos], uiLang); } // else 0x04 or 0x06 - all characters already in uppercase @@ -8985,7 +9145,7 @@ RCODE FLMAPI f_asiaUTF8ToColText( if( ui16SubColVal) { - uiFlags |= HAD_SUB_COLLATION; + uiFlags |= F_HAD_SUB_COLLATION; if( ui16SubColVal <= 31) // 5 bit - store bits 10 { setBit( ucSubColBuf, uiSubColBitPos); @@ -9090,13 +9250,13 @@ RCODE FLMAPI f_asiaUTF8ToColText( if( bFirstSubstring) { pucColStr[ uiColLen++] = 0; - pucColStr[ uiColLen++] = COLL_FIRST_SUBSTRING; + pucColStr[ uiColLen++] = F_COLL_FIRST_SUBSTRING; } if( bDataTruncated) { pucColStr[ uiColLen++] = 0; - pucColStr[ uiColLen++] = COLL_TRUNCATED; + pucColStr[ uiColLen++] = F_COLL_TRUNCATED; } // Return NOTHING if no values found @@ -9112,10 +9272,10 @@ RCODE FLMAPI f_asiaUTF8ToColText( // Done putting the String into 3 sections - build the COLLATED KEY - if( uiFlags & HAD_SUB_COLLATION) + if( uiFlags & F_HAD_SUB_COLLATION) { pucColStr[ uiColLen++] = 0; - pucColStr[ uiColLen++] = COLL_MARKER | SC_SUB_COL; + pucColStr[ uiColLen++] = F_COLL_MARKER | F_SC_SUB_COL; // Move the Sub-collation (diacritics) into the collating string @@ -9127,7 +9287,7 @@ RCODE FLMAPI f_asiaUTF8ToColText( // Always represent the marker as 2 bytes and case bits in Asia pucColStr[ uiColLen++] = 0; - pucColStr[ uiColLen++] = COLL_MARKER | SC_MIXED; + pucColStr[ uiColLen++] = F_COLL_MARKER | F_SC_MIXED; uiLength = (FLMUINT)(bytesInBits( uiLowUpBitPos)); f_memcpy( &pucColStr[ uiColLen ], ucLowUpBuf, uiLength); @@ -9158,7 +9318,7 @@ Notes: For each bit in the sub-collation section: 110 - align to next byte & take word value as extended character ****************************************************************************/ -FSTATIC RCODE flmAsiaParseSubCol( +RCODE FLMAPI f_asiaParseSubCol( FLMBYTE * pucWPStr, FLMUINT * puiWPStrLen, FLMUINT uiMaxWPBytes, @@ -9208,7 +9368,7 @@ FSTATIC RCODE flmAsiaParseSubCol( { // Convert to WP diacritic and combine characters - flmWPCmbcar( &ui16WpChar, ui16WpChar, + f_combineWPChar( &ui16WpChar, ui16WpChar, (FLMUINT16)ml1_COLtoD[ ui16Diac]); // Even if cmbcar fails, WpChar is still set to a valid value @@ -9350,9 +9510,7 @@ FSTATIC RCODE flmAsiaParseCase( // For each character (two bytes) in the string ... - for( uiCharCnt = uiWPStrLen >> 1, // Total number of words in word string - ucMaskByte = 0; // Force first time to get a byte - uiCharCnt--;) + for( uiCharCnt = uiWPStrLen >> 1, ucMaskByte = 0; uiCharCnt--;) { FLMBYTE ucChar; FLMBYTE ucCharSet; @@ -9405,7 +9563,7 @@ FSTATIC RCODE flmAsiaParseCase( } else { - flmWPHanToZenkaku( ui16WpChar, 0, &ui16WpChar); + f_wpHanToZenkaku( ui16WpChar, 0, &ui16WpChar); } } else if( ucCharSet == 8) // Greek @@ -9428,7 +9586,7 @@ FSTATIC RCODE flmAsiaParseCase( } else { - flmWPHanToZenkaku( ui16WpChar, 0, &ui16WpChar); + f_wpHanToZenkaku( ui16WpChar, 0, &ui16WpChar); } ucCharSet = (FLMBYTE)(ui16WpChar >> 8); @@ -9446,12 +9604,15 @@ FSTATIC RCODE flmAsiaParseCase( switch( ucCharSet) { case 0: + { // Bit zero only if lower case ui16WpChar |= 0x20; break; + } case 1: + { // In upper/lower case region? if( ucChar >= 26) @@ -9459,8 +9620,10 @@ FSTATIC RCODE flmAsiaParseCase( ui16WpChar++; } break; + } case 8: + { // All lowercase after 69 if( ucChar <= 69) @@ -9468,8 +9631,10 @@ FSTATIC RCODE flmAsiaParseCase( ui16WpChar++; } break; + } case 10: + { // No cases after 199 if( ucChar <= 199) @@ -9477,21 +9642,26 @@ FSTATIC RCODE flmAsiaParseCase( ui16WpChar++; } break; + } case 0x25: case 0x26: + { // Should be double wide latin or Greek // Add offset to convert to lowercase ui16WpChar += 0x20; break; + } case 0x27: + { // Double wide cyrillic only // Add offset to convert to lowercase ui16WpChar += 0x30; break; + } } } } @@ -9503,7 +9673,7 @@ FSTATIC RCODE flmAsiaParseCase( { FLMUINT16 ui16NextChar = 0; - ui16WpChar = flmWPZenToHankaku( ui16WpChar, &ui16NextChar); + ui16WpChar = f_wpZenToHankaku( ui16WpChar, &ui16NextChar); if( ui16NextChar) // Move everyone down { if( (*puiWPStrLen) + 2 > uiMaxWPBytes) @@ -9524,7 +9694,7 @@ FSTATIC RCODE flmAsiaParseCase( } else if( ucCharSet == 0x24) { - ui16WpChar = flmWPZenToHankaku( ui16WpChar, NULL); + ui16WpChar = f_wpZenToHankaku( ui16WpChar, NULL); } ucMaskByte >>= 1; // Eat the next bit } @@ -9587,7 +9757,7 @@ RCODE FLMAPI f_asiaColStr2WPStr( FLMBYTE ucCharSet = pucColStr[ uiColStrPos]; ui16ColChar = (FLMUINT16)((ucCharSet << 8) + ucChar); - if( ui16ColChar <= MAX_COL_OPCODE) + if( ui16ColChar <= F_MAX_COL_OPCODE) { break; } @@ -9687,7 +9857,8 @@ RCODE FLMAPI f_asiaColStr2WPStr( pucColStr[ uiColStrPos + 1]); // First substring is before truncated. - if( ui16ColChar == COLL_FIRST_SUBSTRING) + + if( ui16ColChar == F_COLL_FIRST_SUBSTRING) { if( pbFirstSubstring) { @@ -9700,7 +9871,7 @@ RCODE FLMAPI f_asiaColStr2WPStr( pucColStr[ uiColStrPos + 1]); } - if( ui16ColChar == COLL_TRUNCATED) + if( ui16ColChar == F_COLL_TRUNCATED) { if( pbDataTruncated) { @@ -9712,7 +9883,7 @@ RCODE FLMAPI f_asiaColStr2WPStr( pucColStr[ uiColStrPos+1]); } - if( ui16ColChar == (COLL_MARKER | SC_SUB_COL)) + if( ui16ColChar == (F_COLL_MARKER | F_SC_SUB_COL)) { FLMUINT uiTempLen; @@ -9720,7 +9891,7 @@ RCODE FLMAPI f_asiaColStr2WPStr( uiColStrPos += 2; uiLength -= 2; - if( RC_BAD( rc = flmAsiaParseSubCol( pucWPStr, &uiWPStrLen, + if( RC_BAD( rc = f_asiaParseSubCol( pucWPStr, &uiWPStrLen, uiMaxWPBytes, &pucColStr[ uiColStrPos], &uiTempLen))) { goto Exit; @@ -9743,7 +9914,7 @@ RCODE FLMAPI f_asiaColStr2WPStr( pucColStr[ uiColStrPos + 1]); check_case: - if( ui16ColChar == (COLL_MARKER | SC_MIXED)) + if( ui16ColChar == (F_COLL_MARKER | F_SC_MIXED)) { uiColStrPos += 2; @@ -9842,7 +10013,7 @@ after_last_character: { // Convert to WP diacritic and combine characters - flmWPCmbcar( &ui16WPChar, ui16WPChar, + f_combineWPChar( &ui16WPChar, ui16WPChar, (FLMUINT16)ml1_COLtoD[ ui16Diac]); // Even if cmbcar fails, wpchar is still set to a valid value @@ -9921,6 +10092,7 @@ after_last_character: uiNumChars = 0; // Set so we won't loop forever! goto after_last_character; // process trailing bit } + uiSubColBitPos++; // Eat the last '0' bit } diff --git a/ftk/util/ftktest.cpp b/ftk/util/ftktest.cpp index 4021640..696feb8 100644 --- a/ftk/util/ftktest.cpp +++ b/ftk/util/ftktest.cpp @@ -24,22 +24,41 @@ #include "ftk.h" -#define F_ATOM_TEST_THREADS 64 -#define F_ATOM_TEST_ITERATIONS 100000 +#define F_ATOM_TEST_THREADS 64 +#define F_ATOM_TEST_ITERATIONS 100000 +#define UTOWP60_ENTRIES 1502 + +#define Upper_JP_A 0x2520 +#define Upper_JP_Z 0x2539 +#define Upper_KR_A 0x5420 +#define Upper_KR_Z 0x5439 +#define Upper_CS_A 0x82FC +#define Upper_CS_Z 0x8316 +#define Upper_CT_A 0xA625 +#define Upper_CT_Z 0xA63E + +#define Lower_JP_a 0x2540 +#define Lower_JP_z 0x2559 +#define Lower_KR_a 0x5440 +#define Lower_KR_z 0x5459 +#define Lower_CS_a 0x82DC +#define Lower_CS_z 0x82F5 +#define Lower_CT_a 0xA60B +#define Lower_CT_z 0xA624 FSTATIC RCODE ftkTestAtomics( void); FSTATIC RCODE FLMAPI ftkAtomicIncThread( - IF_Thread * pThread); + IF_Thread * pThread); FSTATIC RCODE FLMAPI ftkAtomicDecThread( - IF_Thread * pThread); + IF_Thread * pThread); FSTATIC RCODE FLMAPI ftkAtomicIncDecThread( - IF_Thread * pThread); + IF_Thread * pThread); FSTATIC RCODE FLMAPI ftkAtomicExchangeThread( - IF_Thread * pThread); + IF_Thread * pThread); FSTATIC RCODE ftkFastChecksumTest( void); @@ -50,9 +69,4178 @@ FSTATIC FLMBYTE ftkSlowPacketChecksum( const FLMBYTE * pucPacket, FLMUINT uiBytesToChecksum); -FSTATIC FLMATOMIC gv_refCount; -FSTATIC FLMATOMIC gv_spinLock; +FSTATIC RCODE ftkTestText( void); + +FSTATIC FLMBOOL flmCh6Brkcar( + FLMUINT16 ui16WpChar, + FLMUINT16 * pui16BaseChar, + FLMUINT16 * pui16DiacriticChar); +FSTATIC FLMBOOL flmCh6Cmbcar( + FLMUINT16 * pui16WpChar, + FLMUINT16 ui16BaseChar, + FLMINT16 ui16DiacriticChar); + +FSTATIC FLMUINT16 flmCh6Upper( + FLMUINT16 ui16WpChar); + +FSTATIC FLMBOOL flmIsUpper( + FLMUINT16 ui16WpChar); + +FSTATIC FLMUINT16 flmCh6Lower( + FLMUINT16 ui16WpChar); + +FSTATIC FLMUINT16 ZenToHankaku( + FLMUINT16 ui16WpChar, + FLMUINT16 * DakutenOrHandakutenRV); + +FSTATIC FLMUINT16 HanToZenkaku( + FLMUINT16 ui16WpChar, + FLMUINT16 ui16NextWpChar, + FLMUINT16 * pui16Zenkaku); + +FSTATIC FLMUINT16 flmGetCollation( + FLMUINT16 ui16WpChar, + FLMUINT uiLanguage); + +FSTATIC FLMATOMIC gv_refCount; +FSTATIC FLMATOMIC gv_spinLock; + +/**************************************************************************** +Desc: +****************************************************************************/ +#define grave 0 +#define centerd 1 +#define tilde 2 +#define circum 3 +#define crossb 4 +#define slash 5 +#define acute 6 +#define umlaut 7 +#define macron 8 +#define aposab 9 +#define aposbes 10 +#define aposba 11 +#define ring 14 +#define dota 15 +#define dacute 16 +#define cedilla 17 +#define ogonek 18 +#define caron 19 +#define stroke 20 +#define breve 22 +#define dotlesi 239 +#define dotlesj 25 +#define gacute 83 // greek acute +#define gdia 84 // greek diaeresis +#define gactdia 85 // acute diaeresis +#define ggrvdia 86 // grave diaeresis +#define ggrave 87 // greek grave +#define gcircm 88 // greek circumflex +#define gsmooth 89 // smooth breathing +#define grough 90 // rough breathing +#define giota 91 // iota subscript +#define gsmact 92 // smooth breathing acute +#define grgact 93 // rough breathing acute +#define gsmgrv 94 // smooth breathing grave +#define grggrv 95 // rough breathing grave +#define gsmcir 96 // smooth breathing circumflex +#define grgcir 97 // rough breathing circumflex +#define gactio 98 // acute iota +#define ggrvio 99 // grave iota +#define gcirio 100 // circumflex iota +#define gsmio 101 // smooth iota +#define grgio 102 // rough iota +#define gsmaio 103 // smooth acute iota +#define grgaio 104 // rough acute iota +#define gsmgvio 105 // smooth grave iota +#define grggvio 106 // rough grave iota +#define gsmcio 107 // smooth circumflex iota +#define grgcio 108 // rough circumflex iota +#define ghprime 81 // high prime +#define glprime 82 // low prime +#define racute 200 // russian acute +#define rgrave 201 // russian grave +#define rrtdesc 204 // russian right descender +#define rogonek 205 // russian ogonek +#define rmacron 206 // russian macron + +#define NCHSETS 15 // # of character sets (excluding asian) + +#define START_COL 12 +#define START_ALL (START_COL + 1) // all US and European +#define START_DK (START_COL + 2) // Danish +#define START_IS (START_COL + 3) // Icelandic +#define START_NO (START_COL + 4) // Norwegian +#define START_SU (START_COL + 5) // Finnish +#define START_SV (START_COL + 5) // Swedish +#define START_YK (START_COL + 6) // Ukrain +#define START_TK (START_COL + 7) // Turkish +#define START_CZ (START_COL + 8) // Czech +#define START_SL (START_COL + 8) // Slovak + +#define FIXUP_AREA_SIZE 24 + +#define STATE1 1 +#define STATE2 2 +#define STATE3 3 +#define STATE4 4 +#define STATE5 5 +#define STATE6 6 +#define STATE7 7 +#define STATE8 8 +#define STATE9 9 +#define STATE10 10 +#define STATE11 11 +#define AFTERC 12 +#define AFTERH 13 +#define AFTERL 14 +#define INSTAE 15 +#define INSTOE 16 +#define INSTSG 17 +#define INSTIJ 18 +#define WITHAA 19 + +/**************************************************************************** +Desc: Defines the range of characters within the set which are case + convertible +****************************************************************************/ +FSTATIC FLMBYTE flmCaseConvertableRange[] = +{ + 26, 241, // Multinational 1 + 0, 0, // Multinational 2 + 0, 0, // Box Drawing + 0, 0, // Symbol 1 + 0, 0, // Symbol 2 + 0, 0, // Math 1 + 0, 0, // Math 2 + 0, 69, // Greek 1 + 0, 0, // Hebrew + 0, 199, // Cyrillic + 0, 0, // Japanese Kana + 0, 0, // User-defined + 0, 0, // Not defined + 0, 0, // Not defined + 0, 0, // Not defined +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FSTATIC FLMUINT16 WP_UTOWP60[ UTOWP60_ENTRIES][ 2] = +{ + { 0x00A1, 0x0407 }, // 7 , 4, + { 0x00A2, 0x0413 }, // 19 , 4, + { 0x00A3, 0x040b }, // 11 , 4, + { 0x00A4, 0x0418 }, // 24 , 4, + { 0x00A5, 0x040c }, // 12 , 4, + { 0x00A7, 0x0406 }, // 6 , 4, + { 0x00A9, 0x0417 }, // 23 , 4, + { 0x00AA, 0x040f }, // 15 , 4, + { 0x00AB, 0x0409 }, // 9 , 4, + { 0x00AC, 0x0614 }, // 20 , 6, + { 0x00AE, 0x0416 }, // 22 , 4, + { 0x00B0, 0x0624 }, // 36 , 6, + { 0x00B1, 0x0601 }, // 1 , 6, + { 0x00B2, 0x0414 }, // 20 , 4, + { 0x00B3, 0x041a }, // 26 , 4, + { 0x00B5, 0x0625 }, // 37 , 6, + { 0x00B6, 0x0405 }, // 5 , 4, + { 0x00B7, 0x0101 }, // 101, 1, + { 0x00B9, 0x044e }, // 78 , 4, + { 0x00BA, 0x0410 }, // 16 , 4, + { 0x00BB, 0x040a }, // 10 , 4, + { 0x00BC, 0x0412 }, // 18 , 4, + { 0x00BD, 0x0411 }, // 17 , 4, + { 0x00BE, 0x0419 }, // 25 , 4, + { 0x00BF, 0x0408 }, // 8 , 4, + { 0x00C0, 0x0120 }, // 32 , 1, + { 0x00C1, 0x011a }, // 26 , 1, + { 0x00C2, 0x011c }, // 28 , 1, + { 0x00C3, 0x014c }, // 76 , 1, + { 0x00C4, 0x011e }, // 30 , 1, + { 0x00C5, 0x0122 }, // 34 , 1, + { 0x00C6, 0x0124 }, // 36 , 1, + { 0x00C7, 0x0126 }, // 38 , 1, + { 0x00C8, 0x012e }, // 46 , 1, + { 0x00C9, 0x0128 }, // 40 , 1, + { 0x00CA, 0x012a }, // 42 , 1, + { 0x00CB, 0x012c }, // 44 , 1, + { 0x00CC, 0x0136 }, // 54 , 1, + { 0x00CD, 0x0130 }, // 48 , 1, + { 0x00CE, 0x0132 }, // 50 , 1, + { 0x00CF, 0x0134 }, // 52 , 1, + { 0x00D0, 0x0156 }, // 86 , 1, + { 0x00D1, 0x0138 }, // 56 , 1, + { 0x00D2, 0x0140 }, // 64 , 1, + { 0x00D3, 0x013a }, // 58 , 1, + { 0x00D4, 0x013c }, // 60 , 1, + { 0x00D5, 0x0152 }, // 82 , 1, + { 0x00D6, 0x013e }, // 62 , 1, + { 0x00D7, 0x0627 }, // 39 , 6, + { 0x00D8, 0x0150 }, // 80 , 1, + { 0x00D9, 0x0148 }, // 72 , 1, + { 0x00DA, 0x0142 }, // 66 , 1, + { 0x00DB, 0x0144 }, // 68 , 1, + { 0x00DC, 0x0146 }, // 70 , 1, + { 0x00DD, 0x0154 }, // 84 , 1, + { 0x00DE, 0x0158 }, // 88 , 1, + { 0x00DF, 0x0117 }, // 23 , 1, + { 0x00E0, 0x0121 }, // 33 , 1, + { 0x00E1, 0x011b }, // 27 , 1, + { 0x00E2, 0x011d }, // 29 , 1, + { 0x00E3, 0x014d }, // 77 , 1, + { 0x00E4, 0x011f }, // 31 , 1, + { 0x00E5, 0x0123 }, // 35 , 1, + { 0x00E6, 0x0125 }, // 37 , 1, + { 0x00E7, 0x0127 }, // 39 , 1, + { 0x00E8, 0x012f }, // 47 , 1, + { 0x00E9, 0x0129 }, // 41 , 1, + { 0x00EA, 0x012b }, // 43 , 1, + { 0x00EB, 0x012d }, // 45 , 1, + { 0x00EC, 0x0137 }, // 55 , 1, + { 0x00ED, 0x0131 }, // 49 , 1, + { 0x00EE, 0x0133 }, // 51 , 1, + { 0x00EF, 0x0135 }, // 53 , 1, + { 0x00F0, 0x0157 }, // 87 , 1, + { 0x00F1, 0x0139 }, // 57 , 1, + { 0x00F2, 0x0141 }, // 65 , 1, + { 0x00F3, 0x013b }, // 59 , 1, + { 0x00F4, 0x013d }, // 61 , 1, + { 0x00F5, 0x0153 }, // 83 , 1, + { 0x00F6, 0x013f }, // 63 , 1, + { 0x00F7, 0x0608 }, // 8 , 6, + { 0x00F8, 0x0151 }, // 81 , 1, + { 0x00F9, 0x0149 }, // 73 , 1, + { 0x00FA, 0x0143 }, // 67 , 1, + { 0x00FB, 0x0145 }, // 69 , 1, + { 0x00FC, 0x0147 }, // 71 , 1, + { 0x00FD, 0x0155 }, // 85 , 1, + { 0x00FE, 0x0159 }, // 89 , 1, + { 0x00FF, 0x014b }, // 75 , 1, + { 0x0100, 0x015c }, // 92 , 1, + { 0x0101, 0x015d }, // 93 , 1, + { 0x0102, 0x015a }, // 90 , 1, + { 0x0103, 0x015b }, // 91 , 1, + { 0x0104, 0x015e }, // 94 , 1, + { 0x0105, 0x015f }, // 95 , 1, + { 0x0106, 0x0160 }, // 96 , 1, + { 0x0107, 0x0161 }, // 97 , 1, + { 0x0108, 0x0164 }, // 100, 1, + { 0x0109, 0x0165 }, // 101, 1, + { 0x010A, 0x0166 }, // 102, 1, + { 0x010B, 0x0167 }, // 103, 1, + { 0x010C, 0x0162 }, // 98 , 1, + { 0x010D, 0x0163 }, // 99 , 1, + { 0x010E, 0x0168 }, // 104, 1, + { 0x010F, 0x0169 }, // 105, 1, + { 0x0110, 0x014e }, // 78 , 1, + { 0x0111, 0x014f }, // 79 , 1, + { 0x0112, 0x016e }, // 110, 1, + { 0x0113, 0x016f }, // 111, 1, + { 0x0114, 0x01ea }, // 234, 1, + { 0x0115, 0x01eb }, // 235, 1, + { 0x0116, 0x016c }, // 108, 1, + { 0x0117, 0x016d }, // 109, 1, + { 0x0118, 0x0170 }, // 112, 1, + { 0x0119, 0x0171 }, // 113, 1, + { 0x011A, 0x016a }, // 106, 1, + { 0x011B, 0x016b }, // 107, 1, + { 0x011C, 0x017a }, // 122, 1, + { 0x011D, 0x017b }, // 123, 1, + { 0x011E, 0x0174 }, // 116, 1, + { 0x011F, 0x0175 }, // 117, 1, + { 0x0120, 0x017c }, // 124, 1, + { 0x0121, 0x017d }, // 125, 1, + { 0x0122, 0x0178 }, // 120, 1, + { 0x0123, 0x0179 }, // 121, 1, + { 0x0124, 0x017e }, // 126, 1, + { 0x0125, 0x017f }, // 127, 1, + { 0x0126, 0x0180 }, // 128, 1, + { 0x0127, 0x0181 }, // 129, 1, + { 0x0128, 0x0188 }, // 136, 1, + { 0x0129, 0x0189 }, // 137, 1, + { 0x012A, 0x0184 }, // 132, 1, + { 0x012B, 0x0185 }, // 133, 1, + { 0x012C, 0x01ec }, // 236, 1, + { 0x012D, 0x01ed }, // 237, 1, + { 0x012E, 0x0186 }, // 134, 1, + { 0x012F, 0x0187 }, // 135, 1, + { 0x0130, 0x0182 }, // 130, 1, + { 0x0131, 0x01ef }, // 239, 1, + { 0x0132, 0x018a }, // 138, 1, + { 0x0133, 0x018b }, // 139, 1, + { 0x0134, 0x018c }, // 140, 1, + { 0x0135, 0x018d }, // 141, 1, + { 0x0136, 0x018e }, // 142, 1, + { 0x0137, 0x018f }, // 143, 1, + { 0x0138, 0x0118 }, // 24 , 1, + { 0x0139, 0x0190 }, // 144, 1, + { 0x013A, 0x0191 }, // 145, 1, + { 0x013B, 0x0194 }, // 148, 1, + { 0x013C, 0x0195 }, // 149, 1, + { 0x013D, 0x0192 }, // 146, 1, + { 0x013E, 0x0193 }, // 147, 1, + { 0x013F, 0x0196 }, // 150, 1, + { 0x0140, 0x0197 }, // 151, 1, + { 0x0141, 0x0198 }, // 152, 1, + { 0x0142, 0x0199 }, // 153, 1, + { 0x0143, 0x019a }, // 154, 1, + { 0x0144, 0x019b }, // 155, 1, + { 0x0145, 0x01a0 }, // 160, 1, + { 0x0146, 0x01a1 }, // 161, 1, + { 0x0147, 0x019e }, // 158, 1, + { 0x0148, 0x019f }, // 159, 1, + { 0x0149, 0x019d }, // 157, 1, + { 0x014A, 0x01d2 }, // 210, 1, + { 0x014B, 0x01d3 }, // 211, 1, + { 0x014C, 0x01a4 }, // 164, 1, + { 0x014D, 0x01a5 }, // 165, 1, + { 0x014E, 0x01f0 }, // 240, 1, + { 0x014F, 0x01f1 }, // 241, 1, + { 0x0150, 0x01a2 }, // 162, 1, + { 0x0151, 0x01a3 }, // 163, 1, + { 0x0152, 0x01a6 }, // 166, 1, + { 0x0153, 0x01a7 }, // 167, 1, + { 0x0154, 0x01a8 }, // 168, 1, + { 0x0155, 0x01a9 }, // 169, 1, + { 0x0156, 0x01ac }, // 172, 1, + { 0x0157, 0x01ad }, // 173, 1, + { 0x0158, 0x01aa }, // 170, 1, + { 0x0159, 0x01ab }, // 171, 1, + { 0x015A, 0x01ae }, // 174, 1, + { 0x015B, 0x01af }, // 175, 1, + { 0x015C, 0x01b4 }, // 180, 1, + { 0x015D, 0x01b5 }, // 181, 1, + { 0x015E, 0x01b2 }, // 178, 1, + { 0x015F, 0x01b3 }, // 179, 1, + { 0x0160, 0x01b0 }, // 176, 1, + { 0x0161, 0x01b1 }, // 177, 1, + { 0x0162, 0x01b8 }, // 184, 1, + { 0x0163, 0x01b9 }, // 185, 1, + { 0x0164, 0x01b6 }, // 182, 1, + { 0x0165, 0x01b7 }, // 183, 1, + { 0x0166, 0x01ba }, // 186, 1, + { 0x0167, 0x01bb }, // 187, 1, + { 0x0168, 0x01c6 }, // 198, 1, + { 0x0169, 0x01c7 }, // 199, 1, + { 0x016A, 0x01c0 }, // 192, 1, + { 0x016B, 0x01c1 }, // 193, 1, + { 0x016C, 0x01bc }, // 188, 1, + { 0x016D, 0x01bd }, // 189, 1, + { 0x016E, 0x01c4 }, // 196, 1, + { 0x016F, 0x01c5 }, // 197, 1, + { 0x0170, 0x01be }, // 190, 1, + { 0x0171, 0x01bf }, // 191, 1, + { 0x0172, 0x01c2 }, // 194, 1, + { 0x0173, 0x01c3 }, // 195, 1, + { 0x0174, 0x01c8 }, // 200, 1, + { 0x0175, 0x01c9 }, // 201, 1, + { 0x0176, 0x01ca }, // 202, 1, + { 0x0177, 0x01cb }, // 203, 1, + { 0x0178, 0x014a }, // 74 , 1, + { 0x0179, 0x01cc }, // 204, 1, + { 0x017A, 0x01cd }, // 205, 1, + { 0x017B, 0x01d0 }, // 208, 1, + { 0x017C, 0x01d1 }, // 209, 1, + { 0x017D, 0x01ce }, // 206, 1, + { 0x017E, 0x01cf }, // 207, 1, + { 0x0192, 0x040e }, // 14 , 4, + { 0x0194, 0x0a7c }, // 124, 10, + { 0x01A0, 0x01e6 }, // 230, 1, + { 0x01A1, 0x01e7 }, // 231, 1, + { 0x01AF, 0x01e8 }, // 232, 1, + { 0x01B0, 0x01e9 }, // 233, 1, + { 0x01C0, 0x0605 }, // 5 , 6, + { 0x0250, 0x0237 }, // 55 , 2, + { 0x0251, 0x0238 }, // 56 , 2, + { 0x0252, 0x0239 }, // 57 , 2, + { 0x0253, 0x023a }, // 58 , 2, + { 0x0254, 0x023c }, // 60 , 2, + { 0x0255, 0x023d }, // 61 , 2, + { 0x0256, 0x023f }, // 63 , 2, + { 0x0257, 0x0240 }, // 64 , 2, + { 0x0258, 0x0241 }, // 65 , 2, + { 0x0259, 0x0242 }, // 66 , 2, + { 0x025A, 0x0243 }, // 67 , 2, + { 0x025B, 0x0244 }, // 68 , 2, + { 0x025C, 0x0245 }, // 69 , 2, + { 0x025D, 0x0246 }, // 70 , 2, + { 0x025E, 0x0248 }, // 72 , 2, + { 0x025F, 0x0249 }, // 73 , 2, + { 0x0260, 0x024c }, // 76 , 2, + { 0x0261, 0x024b }, // 75 , 2, + { 0x0262, 0x024d }, // 77 , 2, + { 0x0263, 0x024f }, // 79 , 2, + { 0x0264, 0x0250 }, // 80 , 2, + { 0x0265, 0x0251 }, // 81 , 2, + { 0x0266, 0x0252 }, // 82 , 2, + { 0x0267, 0x0253 }, // 83 , 2, + { 0x0268, 0x0255 }, // 85 , 2, + { 0x0269, 0x0257 }, // 87 , 2, + { 0x026A, 0x0256 }, // 86 , 2, + { 0x026B, 0x025a }, // 90 , 2, + { 0x026C, 0x025b }, // 91 , 2, + { 0x026D, 0x025c }, // 92 , 2, + { 0x026E, 0x025e }, // 94 , 2, + { 0x026F, 0x0260 }, // 96 , 2, + { 0x0270, 0x0261 }, // 97 , 2, + { 0x0271, 0x0262 }, // 98 , 2, + { 0x0272, 0x0263 }, // 99 , 2, + { 0x0273, 0x0264 }, // 100, 2, + { 0x0274, 0x0265 }, // 101, 2, + { 0x0275, 0x0279 }, // 121, 2, + { 0x0276, 0x0266 }, // 102, 2, + { 0x0277, 0x0267 }, // 103, 2, + { 0x0278, 0x024a }, // 74 , 2, + { 0x0279, 0x0269 }, // 105, 2, + { 0x027A, 0x026a }, // 106, 2, + { 0x027B, 0x026b }, // 107, 2, + { 0x027C, 0x026c }, // 108, 2, + { 0x027D, 0x026d }, // 109, 2, + { 0x027E, 0x026e }, // 110, 2, + { 0x027F, 0x026f }, // 111, 2, + { 0x0280, 0x0270 }, // 112, 2, + { 0x0281, 0x0271 }, // 113, 2, + { 0x0282, 0x0272 }, // 114, 2, + { 0x0283, 0x0273 }, // 115, 2, + { 0x0284, 0x0274 }, // 116, 2, + { 0x0285, 0x0275 }, // 117, 2, + { 0x0286, 0x0276 }, // 118, 2, + { 0x0287, 0x0277 }, // 119, 2, + { 0x0288, 0x0278 }, // 120, 2, + { 0x0289, 0x027a }, // 122, 2, + { 0x028A, 0x027b }, // 123, 2, + { 0x028B, 0x027d }, // 125, 2, + { 0x028C, 0x027c }, // 124, 2, + { 0x028D, 0x027e }, // 126, 2, + { 0x028E, 0x025f }, // 95 , 2, + { 0x028F, 0x0280 }, // 128, 2, + { 0x0290, 0x0281 }, // 129, 2, + { 0x0291, 0x0282 }, // 130, 2, + { 0x0292, 0x0283 }, // 131, 2, + { 0x0293, 0x0284 }, // 132, 2, + { 0x0294, 0x0285 }, // 133, 2, + { 0x0295, 0x0286 }, // 134, 2, + { 0x0296, 0x0287 }, // 135, 2, + { 0x0297, 0x023e }, // 62 , 2, + { 0x0298, 0x028a }, // 138, 2, + { 0x0299, 0x023b }, // 59 , 2, + { 0x029A, 0x0247 }, // 71 , 2, + { 0x029B, 0x024e }, // 78 , 2, + { 0x029C, 0x0254 }, // 84 , 2, + { 0x029D, 0x0258 }, // 88 , 2, + { 0x029E, 0x0259 }, // 89 , 2, + { 0x029F, 0x025d }, // 93 , 2, + { 0x02A0, 0x0268 }, // 104, 2, + { 0x02A1, 0x0288 }, // 136, 2, + { 0x02A2, 0x0289 }, // 137, 2, + { 0x02A3, 0x028b }, // 139, 2, + { 0x02A4, 0x028c }, // 140, 2, + { 0x02A5, 0x028d }, // 141, 2, + { 0x02A6, 0x028e }, // 142, 2, + { 0x02A7, 0x028f }, // 143, 2, + { 0x02A8, 0x0290 }, // 144, 2, + { 0x02B0, 0x0235 }, // 53 , 2, + { 0x02B6, 0x0236 }, // 54 , 2, + { 0x02B9, 0x0200 }, // 0 , 2, + { 0x02BA, 0x0201 }, // 1 , 2, + { 0x02BB, 0x0202 }, // 2 , 2, + { 0x02BC, 0x0205 }, // 5 , 2, + { 0x02BD, 0x0204 }, // 4 , 2, + { 0x02BE, 0x0207 }, // 7 , 2, + { 0x02BF, 0x0208 }, // 8 , 2, + { 0x02C6, 0x0217 }, // 23 , 2, + { 0x02C7, 0x0218 }, // 24 , 2, + { 0x02C8, 0x020f }, // 15 , 2, + { 0x02C9, 0x0211 }, // 17 , 2, + { 0x02CA, 0x0212 }, // 18 , 2, + { 0x02CB, 0x0213 }, // 19 , 2, + { 0x02CC, 0x0210 }, // 16 , 2, + { 0x02CD, 0x0214 }, // 20 , 2, + { 0x02CE, 0x0215 }, // 21 , 2, + { 0x02CF, 0x0216 }, // 22 , 2, + { 0x02D0, 0x020a }, // 10 , 2, + { 0x02D1, 0x020b }, // 11 , 2, + { 0x02D2, 0x022a }, // 42 , 2, + { 0x02D3, 0x022b }, // 43 , 2, + { 0x02DA, 0x021b }, // 27 , 2, + { 0x02DB, 0x0231 }, // 49 , 2, + { 0x02DC, 0x0219 }, // 25 , 2, + { 0x02DE, 0x0233 }, // 51 , 2, + { 0x0300, 0x0100 }, // 0 , 1, + { 0x0301, 0x0106 }, // 6 , 1, + { 0x0302, 0x0103 }, // 3 , 1, + { 0x0303, 0x0102 }, // 2 , 1, + { 0x0304, 0x0108 }, // 8 , 1, + { 0x0305, 0x0115 }, // 21 , 1, + { 0x0306, 0x0116 }, // 22 , 1, + { 0x0307, 0x010f }, // 15 , 1, + { 0x0308, 0x0107 }, // 7 , 1, + { 0x030A, 0x010e }, // 14 , 1, + { 0x030B, 0x0110 }, // 16 , 1, + { 0x030C, 0x0113 }, // 19 , 1, + { 0x0310, 0x0209 }, // 9 , 2, + { 0x0311, 0x0858 }, // 88 , 8, + { 0x0313, 0x0109 }, // 9 , 1, + { 0x0314, 0x085a }, // 90 , 8, + { 0x0315, 0x010a }, // 10 , 1, + { 0x031C, 0x0221 }, // 33 , 2, + { 0x031D, 0x0222 }, // 34 , 2, + { 0x031E, 0x0223 }, // 35 , 2, + { 0x031F, 0x0224 }, // 36 , 2, + { 0x0320, 0x0225 }, // 37 , 2, + { 0x0321, 0x0226 }, // 38 , 2, + { 0x0322, 0x0227 }, // 39 , 2, + { 0x0323, 0x021e }, // 30 , 2, + { 0x0324, 0x0220 }, // 32 , 2, + { 0x0325, 0x021a }, // 26 , 2, + { 0x0326, 0x010c }, // 12 , 1, + { 0x0327, 0x0111 }, // 17 , 1, + { 0x0328, 0x0112 }, // 18 , 1, + { 0x0329, 0x020e }, // 14 , 2, + { 0x032A, 0x0228 }, // 40 , 2, + { 0x032B, 0x0229 }, // 41 , 2, + { 0x032C, 0x021d }, // 29 , 2, + { 0x032D, 0x021c }, // 28 , 2, + { 0x032E, 0x020d }, // 13 , 2, + { 0x0335, 0x0104 }, // 4 , 1, + { 0x0337, 0x0114 }, // 20 , 1, + { 0x0338, 0x0105 }, // 5 , 1, + { 0x033E, 0x0230 }, // 48 , 2, + { 0x0345, 0x085b }, // 91 , 8, + { 0x0374, 0x0851 }, // 81 , 8, + { 0x0375, 0x0852 }, // 82 , 8, + { 0x0391, 0x0800 }, // 0 , 8, + { 0x0392, 0x0802 }, // 2 , 8, + { 0x0393, 0x0806 }, // 6 , 8, + { 0x0394, 0x0808 }, // 8 , 8, + { 0x0395, 0x080a }, // 10 , 8, + { 0x0396, 0x080c }, // 12 , 8, + { 0x0397, 0x080e }, // 14 , 8, + { 0x0398, 0x0810 }, // 16 , 8, + { 0x0399, 0x0812 }, // 18 , 8, + { 0x039A, 0x0814 }, // 20 , 8, + { 0x039B, 0x0816 }, // 22 , 8, + { 0x039C, 0x0818 }, // 24 , 8, + { 0x039D, 0x081a }, // 26 , 8, + { 0x039E, 0x081c }, // 28 , 8, + { 0x039F, 0x081e }, // 30 , 8, + { 0x03A0, 0x0820 }, // 32 , 8, + { 0x03A1, 0x0822 }, // 34 , 8, + { 0x03A3, 0x0824 }, // 36 , 8, + { 0x03A4, 0x0828 }, // 40 , 8, + { 0x03A5, 0x082a }, // 42 , 8, + { 0x03A6, 0x082c }, // 44 , 8, + { 0x03A7, 0x082e }, // 46 , 8, + { 0x03A8, 0x0830 }, // 48 , 8, + { 0x03A9, 0x0832 }, // 50 , 8, + { 0x03AA, 0x083c }, // 60 , 8, + { 0x03AB, 0x0842 }, // 66 , 8, + { 0x03AC, 0x0835 }, // 53 , 8, + { 0x03AD, 0x0837 }, // 55 , 8, + { 0x03AE, 0x0839 }, // 57 , 8, + { 0x03AF, 0x083b }, // 59 , 8, + { 0x03B1, 0x0801 }, // 1 , 8, + { 0x03B2, 0x0803 }, // 3 , 8, + { 0x03B3, 0x0807 }, // 7 , 8, + { 0x03B4, 0x0809 }, // 9 , 8, + { 0x03B5, 0x080b }, // 11 , 8, + { 0x03B6, 0x080d }, // 13 , 8, + { 0x03B7, 0x080f }, // 15 , 8, + { 0x03B8, 0x0811 }, // 17 , 8, + { 0x03B9, 0x0813 }, // 19 , 8, + { 0x03BA, 0x0815 }, // 21 , 8, + { 0x03BB, 0x0817 }, // 23 , 8, + { 0x03BC, 0x0819 }, // 25 , 8, + { 0x03BD, 0x081b }, // 27 , 8, + { 0x03BE, 0x081d }, // 29 , 8, + { 0x03BF, 0x081f }, // 31 , 8, + { 0x03C0, 0x0821 }, // 33 , 8, + { 0x03C1, 0x0823 }, // 35 , 8, + { 0x03C2, 0x0827 }, // 39 , 8, + { 0x03C3, 0x0825 }, // 37 , 8, + { 0x03C4, 0x0829 }, // 41 , 8, + { 0x03C5, 0x082b }, // 43 , 8, + { 0x03C6, 0x082d }, // 45 , 8, + { 0x03C7, 0x082f }, // 47 , 8, + { 0x03C8, 0x0831 }, // 49 , 8, + { 0x03C9, 0x0833 }, // 51 , 8, + { 0x03CA, 0x083d }, // 61 , 8, + { 0x03CB, 0x0843 }, // 67 , 8, + { 0x03CC, 0x083f }, // 63 , 8, + { 0x03CD, 0x0841 }, // 65 , 8, + { 0x03CE, 0x0845 }, // 69 , 8, + { 0x03D0, 0x0805 }, // 5 , 8, + { 0x03D1, 0x0847 }, // 71 , 8, + { 0x03D2, 0x084c }, // 76 , 8, + { 0x03D5, 0x084d }, // 77 , 8, + { 0x03D6, 0x0849 }, // 73 , 8, + { 0x03D7, 0x084f }, // 79 , 8, + { 0x03DA, 0x08d7 }, // 215, 8, + { 0x03DB, 0x084B }, // 75 , 8, + { 0x03DC, 0x08d8 }, // 216, 8, + { 0x03DE, 0x08d9 }, // 217, 8, + { 0x03E0, 0x08da }, // 218, 8, + { 0x03F0, 0x0848 }, // 72 , 8, + { 0x03F1, 0x084a }, // 74 , 8, + { 0x0401, 0x0a0c }, // 12 , 10, + { 0x0402, 0x0a4a }, // 74 , 10, + { 0x0403, 0x0a44 }, // 68 , 10, + { 0x0404, 0x0a4e }, // 78 , 10, + { 0x0405, 0x0a52 }, // 82 , 10, + { 0x0406, 0x0a58 }, // 88 , 10, + { 0x0407, 0x0a5a }, // 90 , 10, + { 0x0408, 0x0a5e }, // 94 , 10, + { 0x0409, 0x0a68 }, // 104, 10, + { 0x040A, 0x0a6c }, // 108, 10, + { 0x040B, 0x0a72 }, // 114, 10, + { 0x040C, 0x0a60 }, // 96 , 10, + { 0x040E, 0x0a74 }, // 116, 10, + { 0x040F, 0x0a86 }, // 134, 10, + { 0x0410, 0x0a00 }, // 0 , 10, + { 0x0411, 0x0a02 }, // 2 , 10, + { 0x0412, 0x0a04 }, // 4 , 10, + { 0x0413, 0x0a06 }, // 6 , 10, + { 0x0414, 0x0a08 }, // 8 , 10, + { 0x0415, 0x0a0a }, // 10 , 10, + { 0x0416, 0x0a0e }, // 14 , 10, + { 0x0417, 0x0a10 }, // 16 , 10, + { 0x0418, 0x0a12 }, // 18 , 10, + { 0x0419, 0x0a14 }, // 20 , 10, + { 0x041A, 0x0a16 }, // 22 , 10, + { 0x041B, 0x0a18 }, // 24 , 10, + { 0x041C, 0x0a1a }, // 26 , 10, + { 0x041D, 0x0a1c }, // 28 , 10, + { 0x041E, 0x0a1e }, // 30 , 10, + { 0x041F, 0x0a20 }, // 32 , 10, + { 0x0420, 0x0a22 }, // 34 , 10, + { 0x0421, 0x0a24 }, // 36 , 10, + { 0x0422, 0x0a26 }, // 38 , 10, + { 0x0423, 0x0a28 }, // 40 , 10, + { 0x0424, 0x0a2a }, // 42 , 10, + { 0x0425, 0x0a2c }, // 44 , 10, + { 0x0426, 0x0a2e }, // 46 , 10, + { 0x0427, 0x0a30 }, // 48 , 10, + { 0x0428, 0x0a32 }, // 50 , 10, + { 0x0429, 0x0a34 }, // 52 , 10, + { 0x042A, 0x0a36 }, // 54 , 10, + { 0x042B, 0x0a38 }, // 56 , 10, + { 0x042C, 0x0a3a }, // 58 , 10, + { 0x042D, 0x0a3c }, // 60 , 10, + { 0x042E, 0x0a3e }, // 62 , 10, + { 0x042F, 0x0a40 }, // 64 , 10, + { 0x0430, 0x0a01 }, // 1 , 10, + { 0x0431, 0x0a03 }, // 3 , 10, + { 0x0432, 0x0a05 }, // 5 , 10, + { 0x0433, 0x0a07 }, // 7 , 10, + { 0x0434, 0x0a09 }, // 9 , 10, + { 0x0435, 0x0a0b }, // 11 , 10, + { 0x0436, 0x0a0f }, // 15 , 10, + { 0x0437, 0x0a11 }, // 17 , 10, + { 0x0438, 0x0a13 }, // 19 , 10, + { 0x0439, 0x0a15 }, // 21 , 10, + { 0x043A, 0x0a17 }, // 23 , 10, + { 0x043B, 0x0a19 }, // 25 , 10, + { 0x043C, 0x0a1b }, // 27 , 10, + { 0x043D, 0x0a1d }, // 29 , 10, + { 0x043E, 0x0a1f }, // 31 , 10, + { 0x043F, 0x0a21 }, // 33 , 10, + { 0x0440, 0x0a23 }, // 35 , 10, + { 0x0441, 0x0a25 }, // 37 , 10, + { 0x0442, 0x0a27 }, // 39 , 10, + { 0x0443, 0x0a29 }, // 41 , 10, + { 0x0444, 0x0a2b }, // 43 , 10, + { 0x0445, 0x0a2d }, // 45 , 10, + { 0x0446, 0x0a2f }, // 47 , 10, + { 0x0447, 0x0a31 }, // 49 , 10, + { 0x0448, 0x0a33 }, // 51 , 10, + { 0x0449, 0x0a35 }, // 53 , 10, + { 0x044A, 0x0a37 }, // 55 , 10, + { 0x044B, 0x0a39 }, // 57 , 10, + { 0x044C, 0x0a3b }, // 59 , 10, + { 0x044D, 0x0a3d }, // 61 , 10, + { 0x044E, 0x0a3f }, // 63 , 10, + { 0x044F, 0x0a41 }, // 65 , 10, + { 0x0451, 0x0a0d }, // 13 , 10, + { 0x0452, 0x0a4b }, // 75 , 10, + { 0x0453, 0x0a45 }, // 69 , 10, + { 0x0454, 0x0a4f }, // 79 , 10, + { 0x0455, 0x0a53 }, // 83 , 10, + { 0x0456, 0x0a59 }, // 89 , 10, + { 0x0457, 0x0a5b }, // 91 , 10, + { 0x0458, 0x0a5f }, // 95 , 10, + { 0x0459, 0x0a69 }, // 105, 10, + { 0x045A, 0x0a6d }, // 109, 10, + { 0x045B, 0x0a73 }, // 115, 10, + { 0x045C, 0x0a61 }, // 97 , 10, + { 0x045E, 0x0a75 }, // 117, 10, + { 0x045F, 0x0a87 }, // 135, 10, + { 0x0460, 0x0a70 }, // 112, 10, + { 0x0461, 0x0a71 }, // 113, 10, + { 0x0462, 0x0a8e }, // 142, 10, + { 0x0463, 0x0a8f }, // 143, 10, + { 0x0466, 0x0a90 }, // 144, 10, + { 0x0467, 0x0a91 }, // 145, 10, + { 0x046A, 0x0a92 }, // 146, 10, + { 0x046B, 0x0a93 }, // 147, 10, + { 0x046E, 0x0a94 }, // 148, 10, + { 0x046F, 0x0a95 }, // 149, 10, + { 0x0470, 0x0a96 }, // 150, 10, + { 0x0471, 0x0a97 }, // 151, 10, + { 0x0472, 0x0a98 }, // 152, 10, + { 0x0473, 0x0a99 }, // 153, 10, + { 0x0474, 0x0a9a }, // 154, 10, + { 0x0475, 0x0a9b }, // 155, 10, + { 0x047A, 0x0a6e }, // 110, 10, + { 0x047B, 0x0a6f }, // 111, 10, + { 0x047E, 0x0a84 }, // 132, 10, + { 0x047F, 0x0a85 }, // 133, 10, + { 0x0490, 0x0a46 }, // 70 , 10, + { 0x0491, 0x0a47 }, // 71 , 10, + { 0x0492, 0x0a48 }, // 72 , 10, + { 0x0493, 0x0a49 }, // 73 , 10, + { 0x0496, 0x0a50 }, // 80 , 10, + { 0x0497, 0x0a51 }, // 81 , 10, + { 0x049A, 0x0a62 }, // 98 , 10, + { 0x049B, 0x0a63 }, // 99 , 10, + { 0x049C, 0x0a66 }, // 102, 10, + { 0x049D, 0x0a67 }, // 103, 10, + { 0x04A2, 0x0a6a }, // 106, 10, + { 0x04A3, 0x0a6b }, // 107, 10, + { 0x04AE, 0x0a78 }, // 120, 10, + { 0x04AF, 0x0a79 }, // 121, 10, + { 0x04B0, 0x0a7a }, // 122, 10, + { 0x04B1, 0x0a7b }, // 123, 10, + { 0x04B2, 0x0a7e }, // 126, 10, + { 0x04B3, 0x0a7f }, // 127, 10, + { 0x04B6, 0x0a88 }, // 136, 10, + { 0x04B7, 0x0a89 }, // 137, 10, + { 0x04B8, 0x0a8a }, // 138, 10, + { 0x04B9, 0x0a8b }, // 139, 10, + { 0x04BA, 0x0a82 }, // 130, 10, + { 0x04BB, 0x0a83 }, // 131, 10, + { 0x04D8, 0x0a42 }, // 66 , 10, + { 0x04D9, 0x0a43 }, // 67 , 10, + { 0x04EE, 0x0a76 }, // 118, 10, + { 0x04EF, 0x0a77 }, // 119, 10, + { 0x05B0, 0x0920 }, // 32 , 9, + { 0x05B1, 0x0921 }, // 33 , 9, + { 0x05B2, 0x0922 }, // 34 , 9, + { 0x05B3, 0x0923 }, // 35 , 9, + { 0x05B4, 0x0924 }, // 36 , 9, + { 0x05B5, 0x0925 }, // 37 , 9, + { 0x05B6, 0x0926 }, // 38 , 9, + { 0x05B7, 0x0927 }, // 39 , 9, + { 0x05B8, 0x0928 }, // 40 , 9, + { 0x05B9, 0x0929 }, // 41 , 9, + { 0x05BB, 0x092b }, // 43 , 9, + { 0x05BC, 0x092c }, // 44 , 9, + { 0x05BD, 0x092d }, // 45 , 9, + { 0x05BF, 0x092e }, // 46 , 9, + { 0x05C0, 0x091c }, // 28 , 9, + { 0x05C3, 0x091d }, // 29 , 9, + { 0x05D0, 0x0900 }, // 0 , 9, + { 0x05D1, 0x0901 }, // 1 , 9, + { 0x05D2, 0x0902 }, // 2 , 9, + { 0x05D3, 0x0903 }, // 3 , 9, + { 0x05D4, 0x0904 }, // 4 , 9, + { 0x05D5, 0x0905 }, // 5 , 9, + { 0x05D6, 0x0906 }, // 6 , 9, + { 0x05D7, 0x0907 }, // 7 , 9, + { 0x05D8, 0x0908 }, // 8 , 9, + { 0x05D9, 0x0909 }, // 9 , 9, + { 0x05DA, 0x090a }, // 10 , 9, + { 0x05DB, 0x090b }, // 11 , 9, + { 0x05DC, 0x090c }, // 12 , 9, + { 0x05DD, 0x090d }, // 13 , 9, + { 0x05DE, 0x090e }, // 14 , 9, + { 0x05DF, 0x090f }, // 15 , 9, + { 0x05E0, 0x0910 }, // 16 , 9, + { 0x05E1, 0x0911 }, // 17 , 9, + { 0x05E2, 0x0912 }, // 18 , 9, + { 0x05E3, 0x0913 }, // 19 , 9, + { 0x05E4, 0x0914 }, // 20 , 9, + { 0x05E5, 0x0915 }, // 21 , 9, + { 0x05E6, 0x0916 }, // 22 , 9, + { 0x05E7, 0x0917 }, // 23 , 9, + { 0x05E8, 0x0918 }, // 24 , 9, + { 0x05E9, 0x0919 }, // 25 , 9, + { 0x05EA, 0x091a }, // 26 , 9, + { 0x05F0, 0x0931 }, // 49 , 9, + { 0x05F1, 0x0932 }, // 50 , 9, + { 0x05F2, 0x0933 }, // 51 , 9, + { 0x05F3, 0x091e }, // 30 , 9, + { 0x05F4, 0x091f }, // 31 , 9, + { 0x060C, 0x0d26 }, // 38 , 13, + { 0x061B, 0x0d27 }, // 39 , 13, + { 0x061F, 0x0d28 }, // 40 , 13, + { 0x0621, 0x0da4 }, // 164, 13, + { 0x0622, 0x0db1 }, // 177, 13, + { 0x0623, 0x0da5 }, // 165, 13, + { 0x0624, 0x0da9 }, // 169, 13, + { 0x0625, 0x0da7 }, // 167, 13, + { 0x0626, 0x0dab }, // 171, 13, + { 0x0627, 0x0d3a }, // 58 , 13, + { 0x0628, 0x0d3c }, // 60 , 13, + { 0x0629, 0x0d98 }, // 152, 13, + { 0x062A, 0x0d40 }, // 64 , 13, + { 0x062B, 0x0d44 }, // 68 , 13, + { 0x062C, 0x0d48 }, // 72 , 13, + { 0x062D, 0x0d4c }, // 76 , 13, + { 0x062E, 0x0d50 }, // 80 , 13, + { 0x062F, 0x0d54 }, // 84 , 13, + { 0x0630, 0x0d56 }, // 86 , 13, + { 0x0631, 0x0d58 }, // 88 , 13, + { 0x0632, 0x0d5a }, // 90 , 13, + { 0x0633, 0x0d5c }, // 92 , 13, + { 0x0634, 0x0d60 }, // 96 , 13, + { 0x0635, 0x0d64 }, // 100, 13, + { 0x0636, 0x0d68 }, // 104, 13, + { 0x0637, 0x0d6c }, // 108, 13, + { 0x0638, 0x0d70 }, // 112, 13, + { 0x0639, 0x0d74 }, // 116, 13, + { 0x063A, 0x0d78 }, // 120, 13, + { 0x0640, 0x0dc2 }, // 194, 13, + { 0x0641, 0x0d7c }, // 124, 13, + { 0x0642, 0x0d80 }, // 128, 13, + { 0x0643, 0x0d84 }, // 132, 13, + { 0x0644, 0x0d88 }, // 136, 13, + { 0x0645, 0x0d8c }, // 140, 13, + { 0x0646, 0x0d90 }, // 144, 13, + { 0x0647, 0x0d94 }, // 148, 13, + { 0x0648, 0x0d9a }, // 154, 13, + { 0x0649, 0x0da0 }, // 160, 13, + { 0x064A, 0x0d9c }, // 156, 13, + { 0x064B, 0x0d10 }, // 16 , 13, + { 0x064C, 0x0d11 }, // 17 , 13, + { 0x064E, 0x0d0a }, // 10 , 13, + { 0x064F, 0x0d0c }, // 12 , 13, + { 0x0650, 0x0d0e }, // 14 , 13, + { 0x0651, 0x0d16 }, // 22 , 13, + { 0x0652, 0x0d14 }, // 20 , 13, + { 0x0660, 0x0d38 }, // 56 , 13, + { 0x0661, 0x0d2f }, // 47 , 13, + { 0x0662, 0x0d30 }, // 48 , 13, + { 0x0663, 0x0d31 }, // 49 , 13, + { 0x0664, 0x0d32 }, // 50 , 13, + { 0x0665, 0x0d33 }, // 51 , 13, + { 0x0666, 0x0d34 }, // 52 , 13, + { 0x0667, 0x0d35 }, // 53 , 13, + { 0x0668, 0x0d36 }, // 54 , 13, + { 0x0669, 0x0d37 }, // 55 , 13, + { 0x066A, 0x0d2a }, // 42 , 13, + { 0x0671, 0x0db3 }, // 179, 13, + { 0x0674, 0x0d24 }, // 36 , 13, + { 0x0679, 0x0e3c }, // 60 , 14, + { 0x067A, 0x0e4c }, // 76 , 14, + { 0x067B, 0x0e30 }, // 48 , 14, + { 0x067C, 0x0e40 }, // 64 , 14, + { 0x067D, 0x0e48 }, // 72 , 14, + { 0x067E, 0x0e38 }, // 56 , 14, + { 0x067F, 0x0e44 }, // 68 , 14, + { 0x0680, 0x0e34 }, // 52 , 14, + { 0x0681, 0x0e64 }, // 100, 14, + { 0x0683, 0x0e54 }, // 84 , 14, + { 0x0684, 0x0e50 }, // 80 , 14, + { 0x0685, 0x0e60 }, // 96 , 14, + { 0x0686, 0x0e58 }, // 88 , 14, + { 0x0687, 0x0e5c }, // 92 , 14, + { 0x0688, 0x0e68 }, // 104, 14, + { 0x0689, 0x0e6a }, // 106, 14, + { 0x068A, 0x0e70 }, // 112, 14, + { 0x068C, 0x0e6c }, // 108, 14, + { 0x068D, 0x0e72 }, // 114, 14, + { 0x068E, 0x0e6e }, // 110, 14, + { 0x0691, 0x0e76 }, // 118, 14, + { 0x0692, 0x0e7C }, // 124, 14, + { 0x0693, 0x0e74 }, // 116, 14, + { 0x0695, 0x0e7a }, // 122, 14, + { 0x0696, 0x0e80 }, // 128, 14, + { 0x0698, 0x0e7e }, // 126, 14, + { 0x0699, 0x0e78 }, // 120, 14, + { 0x069A, 0x0e84 }, // 132, 14, + { 0x06A0, 0x0e88 }, // 136, 14, + { 0x06A4, 0x0e8c }, // 140, 14, + { 0x06A6, 0x0e90 }, // 144, 14, + { 0x06A9, 0x0e94 }, // 148, 14, + { 0x06AA, 0x0e9c }, // 156, 14, + { 0x06AB, 0x0ea8 }, // 168, 14, + { 0x06AF, 0x0ea0 }, // 160, 14, + { 0x06B1, 0x0eac }, // 172, 14, + { 0x06B3, 0x0eb0 }, // 176, 14, + { 0x06B5, 0x0eb4 }, // 180, 14, + { 0x06BA, 0x0eba }, // 186, 14, + { 0x06BB, 0x0ec2 }, // 194, 14, + { 0x06BC, 0x0ebe }, // 190, 14, + { 0x06C0, 0x0eda }, // 218, 14, + { 0x06C6, 0x0ec6 }, // 198, 14, + { 0x06CA, 0x0ec8 }, // 200, 14, + { 0x06CE, 0x0ed0 }, // 208, 14, + { 0x06D1, 0x0ed6 }, // 214, 14, + { 0x06D2, 0x0ed4 }, // 212, 14, + { 0x06D6, 0x0d25 }, // 37 , 13, + { 0x06E4, 0x0d22 }, // 34 , 13, + { 0x06F4, 0x0e29 }, // 41 , 14, + { 0x06F5, 0x0e2b }, // 43 , 14, + { 0x06F6, 0x0e2c }, // 44 , 14, + { 0x06F7, 0x0e2e }, // 46 , 14, + { 0x06F8, 0x0e2f }, // 47 , 14, + { 0x10D0, 0x0ad2 }, // 210, 10, + { 0x10D1, 0x0ad3 }, // 211, 10, + { 0x10D2, 0x0ad4 }, // 212, 10, + { 0x10D3, 0x0ad5 }, // 213, 10, + { 0x10D4, 0x0ad6 }, // 214, 10, + { 0x10D5, 0x0ad7 }, // 215, 10, + { 0x10D6, 0x0ad8 }, // 216, 10, + { 0x10D7, 0x0ada }, // 218, 10, + { 0x10D8, 0x0adb }, // 219, 10, + { 0x10D9, 0x0adc }, // 220, 10, + { 0x10DA, 0x0add }, // 221, 10, + { 0x10DB, 0x0ade }, // 222, 10, + { 0x10DC, 0x0adf }, // 223, 10, + { 0x10DD, 0x0ae1 }, // 225, 10, + { 0x10DE, 0x0ae2 }, // 226, 10, + { 0x10DF, 0x0ae3 }, // 227, 10, + { 0x10E0, 0x0ae4 }, // 228, 10, + { 0x10E1, 0x0ae5 }, // 229, 10, + { 0x10E2, 0x0ae6 }, // 230, 10, + { 0x10E3, 0x0ae7 }, // 231, 10, + { 0x10E4, 0x0ae9 }, // 233, 10, + { 0x10E5, 0x0aea }, // 234, 10, + { 0x10E6, 0x0aeb }, // 235, 10, + { 0x10E7, 0x0aec }, // 236, 10, + { 0x10E8, 0x0aed }, // 237, 10, + { 0x10E9, 0x0aee }, // 238, 10, + { 0x10EA, 0x0aef }, // 239, 10, + { 0x10EB, 0x0af0 }, // 240, 10, + { 0x10EC, 0x0af1 }, // 241, 10, + { 0x10ED, 0x0af2 }, // 242, 10, + { 0x10EE, 0x0af3 }, // 243, 10, + { 0x10EF, 0x0af5 }, // 245, 10, + { 0x10F0, 0x0af6 }, // 246, 10, + { 0x10F1, 0x0ad9 }, // 217, 10, + { 0x10F2, 0x0ae0 }, // 224, 10, + { 0x10F3, 0x0ae8 }, // 232, 10, + { 0x10F4, 0x0af4 }, // 244, 10, + { 0x10F5, 0x0af7 }, // 247, 10, + { 0x10F6, 0x0af8 }, // 248, 10, + { 0x1F00, 0x0873 }, // 115, 8, + { 0x1F01, 0x087b }, // 123, 8, + { 0x1F02, 0x0875 }, // 117, 8, + { 0x1F03, 0x087d }, // 125, 8, + { 0x1F04, 0x0874 }, // 116, 8, + { 0x1F05, 0x087c }, // 124, 8, + { 0x1F10, 0x0884 }, // 132, 8, + { 0x1F11, 0x0887 }, // 135, 8, + { 0x1F12, 0x0886 }, // 134, 8, + { 0x1F13, 0x0889 }, // 137, 8, + { 0x1F14, 0x0885 }, // 133, 8, + { 0x1F15, 0x0888 }, // 136, 8, + { 0x1F20, 0x0890 }, // 144, 8, + { 0x1F21, 0x0898 }, // 152, 8, + { 0x1F22, 0x0892 }, // 146, 8, + { 0x1F23, 0x089a }, // 154, 8, + { 0x1F24, 0x0891 }, // 145, 8, + { 0x1F25, 0x0899 }, // 153, 8, + { 0x1F30, 0x08a4 }, // 164, 8, + { 0x1F31, 0x08a8 }, // 168, 8, + { 0x1F32, 0x08a6 }, // 166, 8, + { 0x1F33, 0x08aa }, // 170, 8, + { 0x1F34, 0x08a5 }, // 165, 8, + { 0x1F35, 0x08a9 }, // 169, 8, + { 0x1F40, 0x08ad }, // 173, 8, + { 0x1F41, 0x08b0 }, // 176, 8, + { 0x1F42, 0x08af }, // 175, 8, + { 0x1F43, 0x08b2 }, // 178, 8, + { 0x1F44, 0x08ae }, // 174, 8, + { 0x1F45, 0x08b1 }, // 177, 8, + { 0x1F50, 0x08b9 }, // 185, 8, + { 0x1F51, 0x08bd }, // 189, 8, + { 0x1F52, 0x08bb }, // 187, 8, + { 0x1F53, 0x08bf }, // 191, 8, + { 0x1F54, 0x08ba }, // 186, 8, + { 0x1F55, 0x08be }, // 190, 8, + { 0x1F60, 0x08c7 }, // 199, 8, + { 0x1F61, 0x08cf }, // 207, 8, + { 0x1F62, 0x08c9 }, // 201, 8, + { 0x1F63, 0x08d1 }, // 209, 8, + { 0x1F64, 0x08c8 }, // 200, 8, + { 0x1F65, 0x08d0 }, // 208, 8, + { 0x1F70, 0x086d }, // 109, 8, + { 0x1F72, 0x0883 }, // 131, 8, + { 0x1F74, 0x088a }, // 138, 8, + { 0x1F76, 0x08a0 }, // 160, 8, + { 0x1F78, 0x08ac }, // 172, 8, + { 0x1F7A, 0x08b5 }, // 181, 8, + { 0x1F7C, 0x08c1 }, // 193, 8, + { 0x1F80, 0x0877 }, // 119, 8, + { 0x1F81, 0x087f }, // 127, 8, + { 0x1F82, 0x0879 }, // 121, 8, + { 0x1F83, 0x0881 }, // 129, 8, + { 0x1F84, 0x0878 }, // 120, 8, + { 0x1F85, 0x0880 }, // 128, 8, + { 0x1F90, 0x0894 }, // 148, 8, + { 0x1F91, 0x089c }, // 156, 8, + { 0x1F92, 0x0896 }, // 150, 8, + { 0x1F93, 0x089e }, // 158, 8, + { 0x1F94, 0x0895 }, // 149, 8, + { 0x1F95, 0x089d }, // 157, 8, + { 0x1FA0, 0x08cb }, // 203, 8, + { 0x1FA1, 0x08d3 }, // 211, 8, + { 0x1FA2, 0x08cd }, // 205, 8, + { 0x1FA3, 0x08d5 }, // 213, 8, + { 0x1FA4, 0x08cc }, // 204, 8, + { 0x1FA5, 0x08d4 }, // 212, 8, + { 0x1FB2, 0x0871 }, // 113, 8, + { 0x1FB3, 0x086f }, // 111, 8, + { 0x1FB4, 0x0870 }, // 112, 8, + { 0x1FC2, 0x088e }, // 142, 8, + { 0x1FC3, 0x088c }, // 140, 8, + { 0x1FC4, 0x088d }, // 141, 8, + { 0x1FCD, 0x085e }, // 94 , 8, + { 0x1FCE, 0x085c }, // 92 , 8, + { 0x1FDD, 0x085f }, // 95 , 8, + { 0x1FDE, 0x085d }, // 93 , 8, + { 0x1FE4, 0x08B4 }, // 180, 8, + { 0x1FE5, 0x08B3 }, // 179, 8, + { 0x1FF2, 0x08c5 }, // 197, 8, + { 0x1FF3, 0x08c3 }, // 195, 8, + { 0x1FF4, 0x08c4 }, // 196, 8, + { 0x2007, 0x0517 }, // 23 , 5, + { 0x2012, 0x0432 }, // 50 , 4, + { 0x2013, 0x0421 }, // 33 , 4, + { 0x2014, 0x0422 }, // 34 , 4, + { 0x2017, 0x022f }, // 47 , 2, + { 0x2018, 0x041d }, // 29 , 4, + { 0x2019, 0x041c }, // 28 , 4, + { 0x201A, 0x043e }, // 62 , 4, + { 0x201B, 0x041b }, // 27 , 4, + { 0x201C, 0x0420 }, // 32 , 4, + { 0x201D, 0x041f }, // 31 , 4, + { 0x201E, 0x043f }, // 63 , 4, + { 0x201F, 0x041e }, // 30 , 4, + { 0x2020, 0x0427 }, // 39 , 4, + { 0x2021, 0x0428 }, // 40 , 4, + { 0x2022, 0x0403 }, // 3 , 4, + { 0x2026, 0x0438 }, // 56 , 4, + { 0x2030, 0x044b }, // 75 , 4, + { 0x2033, 0x0580 }, // 128, 5, + { 0x2034, 0x0671 }, // 113, 6, + { 0x2036, 0x057f }, // 127, 5, + { 0x2039, 0x0423 }, // 35 , 4, + { 0x203A, 0x0424 }, // 36 , 4, + { 0x203C, 0x050d }, // 13 , 5, + { 0x203E, 0x0626 }, // 38 , 6, + { 0x207F, 0x0415 }, // 21 , 4, + { 0x20A0, 0x043c }, // 60 , 4, + { 0x20A2, 0x043b }, // 59 , 4, + { 0x20A3, 0x043a }, // 58 , 4, + { 0x20A4, 0x043d }, // 61 , 4, + { 0x20A6, 0x0457 }, // 87 , 4, + { 0x20A7, 0x040d }, // 13 , 4, + { 0x20A8, 0x0458 }, // 88 , 4, + { 0x20A9, 0x0456 }, // 86 , 4, + { 0x20AA, 0x097A }, // 122, 9, + { 0x20AC, 0x0466 }, // 102, 4, + { 0x20DD, 0x066d }, // 109, 6, + { 0x20E1, 0x06e1 }, // 225, 6, + { 0x2102, 0x06d5 }, // 213, 6, + { 0x2104, 0x0515 }, // 21 , 5, + { 0x2105, 0x0449 }, // 73 , 4, + { 0x2106, 0x044a }, // 74 , 4, + { 0x210C, 0x06e9 }, // 233, 6, + { 0x210F, 0x0632 }, // 50 , 6, + { 0x2111, 0x0633 }, // 51 , 6, + { 0x2112, 0x0669 }, // 105, 6, + { 0x2113, 0x0631 }, // 49 , 6, + { 0x2115, 0x06d7 }, // 215, 6, + { 0x2116, 0x044c }, // 76 , 4, + { 0x2118, 0x0635 }, // 53 , 6, + { 0x211C, 0x0634 }, // 52 , 6, + { 0x211D, 0x06d8 }, // 216, 6, + { 0x211E, 0x042b }, // 43 , 4, + { 0x2120, 0x042a }, // 42 , 4, + { 0x2122, 0x0429 }, // 41 , 4, + { 0x2127, 0x06a7 }, // 167, 6, + { 0x2128, 0x066b }, // 107, 6, + { 0x212B, 0x0623 }, // 35 , 6, + { 0x212D, 0x066a }, // 106, 6, + { 0x212F, 0x0630 }, // 48 , 6, + { 0x2130, 0x06d3 }, // 211, 6, + { 0x2131, 0x06d4 }, // 212, 6, + { 0x2153, 0x0440 }, // 64 , 4, + { 0x2154, 0x0441 }, // 65 , 4, + { 0x215B, 0x0442 }, // 66 , 4, + { 0x215C, 0x0443 }, // 67 , 4, + { 0x215D, 0x0444 }, // 68 , 4, + { 0x215E, 0x0445 }, // 69 , 4, + { 0x2190, 0x0590 }, // 144, 5, + { 0x2191, 0x0617 }, // 23 , 6, + { 0x2192, 0x05d5 }, // 213, 5, + { 0x2193, 0x0618 }, // 24 , 6, + { 0x2194, 0x05d6 }, // 214, 5, + { 0x2195, 0x05d7 }, // 215, 5, + { 0x2196, 0x0640 }, // 64 , 6, + { 0x2197, 0x063e }, // 62 , 6, + { 0x2198, 0x063f }, // 63 , 6, + { 0x2199, 0x0641 }, // 65 , 6, + { 0x219D, 0x0690 }, // 144, 6, + { 0x21A3, 0x0693 }, // 147, 6, + { 0x21A8, 0x050f }, // 15 , 5, + { 0x21A9, 0x0691 }, // 145, 6, + { 0x21AA, 0x0692 }, // 146, 6, + { 0x21B5, 0x0514 }, // 20 , 5, + { 0x21BC, 0x0694 }, // 148, 6, + { 0x21BD, 0x0695 }, // 149, 6, + { 0x21BE, 0x069b }, // 155, 6, + { 0x21BF, 0x069a }, // 154, 6, + { 0x21C0, 0x0696 }, // 150, 6, + { 0x21C1, 0x0697 }, // 151, 6, + { 0x21C2, 0x069d }, // 157, 6, + { 0x21C3, 0x069c }, // 156, 6, + { 0x21C4, 0x0636 }, // 54 , 6, + { 0x21C6, 0x0637 }, // 55 , 6, + { 0x21C7, 0x069f }, // 159, 6, + { 0x21C9, 0x069e }, // 158, 6, + { 0x21CB, 0x0699 }, // 153, 6, + { 0x21CC, 0x0698 }, // 152, 6, + { 0x21D0, 0x0639 }, // 57 , 6, + { 0x21D1, 0x063a }, // 58 , 6, + { 0x21D2, 0x0638 }, // 56 , 6, + { 0x21D3, 0x063b }, // 59 , 6, + { 0x21D4, 0x063c }, // 60 , 6, + { 0x21D5, 0x063d }, // 61 , 6, + { 0x21E6, 0x0597 }, // 151, 5, + { 0x21E8, 0x0596 }, // 150, 5, + { 0x2200, 0x067a }, // 122, 6, + { 0x2202, 0x062c }, // 44 , 6, + { 0x2203, 0x0679 }, // 121, 6, + { 0x2204, 0x06d0 }, // 208, 6, + { 0x2205, 0x0648 }, // 72 , 6, + { 0x2207, 0x062b }, // 43 , 6, + { 0x2208, 0x060f }, // 15 , 6, + { 0x2209, 0x06d1 }, // 209, 6, + { 0x220B, 0x06db }, // 219, 6, + { 0x220D, 0x0647 }, // 71 , 6, + { 0x220F, 0x0629 }, // 41 , 6, + { 0x2210, 0x0672 }, // 114, 6, + { 0x2211, 0x0612 }, // 18 , 6, + { 0x2212, 0x0600 }, // 0 , 6, + { 0x2213, 0x062a }, // 42 , 6, + { 0x2214, 0x06ae }, // 174, 6, + { 0x2215, 0x0606 }, // 6 , 6, + { 0x2216, 0x0607 }, // 7 , 6, + { 0x2218, 0x0621 }, // 33 , 6, + { 0x2219, 0x0622 }, // 34 , 6, + { 0x221A, 0x0704 }, // 4 , 7, + { 0x221D, 0x0604 }, // 4 , 6, + { 0x221E, 0x0613 }, // 19 , 6, + { 0x221F, 0x06da }, // 218, 6, + { 0x2220, 0x064f }, // 79 , 6, + { 0x2221, 0x06a8 }, // 168, 6, + { 0x2222, 0x06a9 }, // 169, 6, + { 0x2223, 0x0609 }, // 9 , 6, + { 0x2224, 0x06ce }, // 206, 6, + { 0x2225, 0x0611 }, // 17 , 6, + { 0x2226, 0x06cd }, // 205, 6, + { 0x2227, 0x0655 }, // 85 , 6, + { 0x2228, 0x0656 }, // 86 , 6, + { 0x2229, 0x0610 }, // 16 , 6, + { 0x222A, 0x0642 }, // 66 , 6, + { 0x222B, 0x0628 }, // 40 , 6, + { 0x222E, 0x0668 }, // 104, 6, + { 0x2234, 0x0666 }, // 102, 6, + { 0x2235, 0x0665 }, // 101, 6, + { 0x2237, 0x0667 }, // 103, 6, + { 0x223C, 0x060c }, // 12 , 6, + { 0x2241, 0x06bd }, // 189, 6, + { 0x2243, 0x0673 }, // 115, 6, + { 0x2244, 0x06be }, // 190, 6, + { 0x2245, 0x0674 }, // 116, 6, + { 0x2247, 0x06bf }, // 191, 6, + { 0x2248, 0x060d }, // 13 , 6, + { 0x2249, 0x06c0 }, // 192, 6, + { 0x224D, 0x06b3 }, // 179, 6, + { 0x224E, 0x06b2 }, // 178, 6, + { 0x2250, 0x06af }, // 175, 6, + { 0x2252, 0x06b0 }, // 176, 6, + { 0x2253, 0x06b1 }, // 177, 6, + { 0x225F, 0x06d9 }, // 217, 6, + { 0x2260, 0x0663 }, // 99 , 6, + { 0x2261, 0x060e }, // 14 , 6, + { 0x2262, 0x0664 }, // 100, 6, + { 0x2264, 0x0602 }, // 2 , 6, + { 0x2265, 0x0603 }, // 3 , 6, + { 0x226A, 0x064d }, // 77 , 6, + { 0x226B, 0x064e }, // 78 , 6, + { 0x226C, 0x06b6 }, // 182, 6, + { 0x226D, 0x06cf }, // 207, 6, + { 0x226E, 0x06b9 }, // 185, 6, + { 0x226F, 0x06bb }, // 187, 6, + { 0x2270, 0x06ba }, // 186, 6, + { 0x2271, 0x06bc }, // 188, 6, + { 0x2272, 0x06eb }, // 235, 6, + { 0x2273, 0x06ec }, // 236, 6, + { 0x227A, 0x0675 }, // 117, 6, + { 0x227B, 0x0677 }, // 119, 6, + { 0x227C, 0x0676 }, // 118, 6, + { 0x227D, 0x0678 }, // 120, 6, + { 0x2280, 0x06c1 }, // 193, 6, + { 0x2281, 0x06c3 }, // 195, 6, + { 0x2282, 0x0643 }, // 67 , 6, + { 0x2283, 0x0644 }, // 68 , 6, + { 0x2284, 0x06c5 }, // 197, 6, + { 0x2285, 0x06c6 }, // 198, 6, + { 0x2286, 0x0645 }, // 69 , 6, + { 0x2287, 0x0646 }, // 70 , 6, + { 0x2288, 0x06c7 }, // 199, 6, + { 0x2289, 0x06c8 }, // 200, 6, + { 0x228A, 0x067e }, // 126, 6, + { 0x228B, 0x067f }, // 127, 6, + { 0x228E, 0x067d }, // 125, 6, + { 0x228F, 0x0682 }, // 130, 6, + { 0x2290, 0x0685 }, // 133, 6, + { 0x2291, 0x0683 }, // 131, 6, + { 0x2292, 0x0686 }, // 134, 6, + { 0x2293, 0x0680 }, // 128, 6, + { 0x2294, 0x0681 }, // 129, 6, + { 0x2295, 0x0651 }, // 81 , 6, + { 0x2296, 0x0652 }, // 82 , 6, + { 0x2297, 0x0650 }, // 80 , 6, + { 0x2299, 0x0654 }, // 84 , 6, + { 0x229A, 0x06a4 }, // 164, 6, + { 0x229B, 0x06a5 }, // 165, 6, + { 0x229D, 0x06a6 }, // 166, 6, + { 0x22A2, 0x065b }, // 91 , 6, + { 0x22A3, 0x065c }, // 92 , 6, + { 0x22A4, 0x0658 }, // 88 , 6, + { 0x22A5, 0x0659 }, // 89 , 6, + { 0x22A8, 0x06b4 }, // 180, 6, + { 0x22BB, 0x0657 }, // 87 , 6, + { 0x22C5, 0x061f }, // 31 , 6, + { 0x22C6, 0x0670 }, // 112, 6, + { 0x22C8, 0x068c }, // 140, 6, + { 0x22D0, 0x06a2 }, // 162, 6, + { 0x22D1, 0x06a3 }, // 163, 6, + { 0x22D2, 0x06a1 }, // 161, 6, + { 0x22D3, 0x06a0 }, // 160, 6, + { 0x22D8, 0x067b }, // 123, 6, + { 0x22D9, 0x067c }, // 124, 6, + { 0x22E0, 0x06c2 }, // 194, 6, + { 0x22E1, 0x06c4 }, // 196, 6, + { 0x22E2, 0x06cb }, // 203, 6, + { 0x22E3, 0x06cc }, // 204, 6, + { 0x22E4, 0x0684 }, // 132, 6, + { 0x22E5, 0x0687 }, // 135, 6, + { 0x22EE, 0x06de }, // 222, 6, + { 0x22EF, 0x06dc }, // 220, 6, + { 0x22F1, 0x06df }, // 223, 6, + { 0x2302, 0x050c }, // 12 , 5, + { 0x2308, 0x0649 }, // 73 , 6, + { 0x2309, 0x064a }, // 74 , 6, + { 0x230A, 0x064b }, // 75 , 6, + { 0x230B, 0x064c }, // 76 , 6, + { 0x2310, 0x0510 }, // 16 , 5, + { 0x2312, 0x065a }, // 90 , 6, + { 0x2319, 0x0511 }, // 17 , 5, + { 0x231A, 0x051f }, // 31 , 5, + { 0x231B, 0x0520 }, // 32 , 5, + { 0x2320, 0x0700 }, // 0 , 7, + { 0x2321, 0x0701 }, // 1 , 7, + { 0x2322, 0x068e }, // 142, 6, + { 0x2323, 0x068d }, // 141, 6, + { 0x2329, 0x060a }, // 10 , 6, + { 0x232A, 0x060b }, // 11 , 6, + { 0x2409, 0x044f }, // 79 , 4, + { 0x240A, 0x0452 }, // 82 , 4, + { 0x240B, 0x0454 }, // 84 , 4, + { 0x240C, 0x0450 }, // 80 , 4, + { 0x240D, 0x0451 }, // 81 , 4, + { 0x2424, 0x0453 }, // 83 , 4, + { 0x24C2, 0x0446 }, // 70 , 4, + { 0x24C5, 0x0447 }, // 71 , 4, + { 0x24CA, 0x0448 }, // 72 , 4, + { 0x2500, 0x0308 }, // 8 , 3, + { 0x2502, 0x0309 }, // 9 , 3, + { 0x250C, 0x030a }, // 10 , 3, + { 0x2510, 0x030b }, // 11 , 3, + { 0x2514, 0x030d }, // 13 , 3, + { 0x2518, 0x030c }, // 12 , 3, + { 0x251C, 0x030e }, // 14 , 3, + { 0x251E, 0x033e }, // 62 , 3, + { 0x251F, 0x033c }, // 60 , 3, + { 0x2521, 0x033f }, // 63 , 3, + { 0x2522, 0x033d }, // 61 , 3, + { 0x2524, 0x0310 }, // 16 , 3, + { 0x2526, 0x0345 }, // 69 , 3, + { 0x2527, 0x0344 }, // 68 , 3, + { 0x2529, 0x0347 }, // 71 , 3, + { 0x252A, 0x0346 }, // 70 , 3, + { 0x252C, 0x030f }, // 15 , 3, + { 0x252D, 0x0342 }, // 66 , 3, + { 0x252E, 0x0340 }, // 64 , 3, + { 0x2531, 0x0343 }, // 67 , 3, + { 0x2532, 0x0341 }, // 65 , 3, + { 0x2534, 0x0311 }, // 17 , 3, + { 0x2535, 0x034a }, // 74 , 3, + { 0x2536, 0x0348 }, // 72 , 3, + { 0x2539, 0x034b }, // 75 , 3, + { 0x253A, 0x0349 }, // 73 , 3, + { 0x253C, 0x0312 }, // 18 , 3, + { 0x253D, 0x0352 }, // 82 , 3, + { 0x253E, 0x034e }, // 78 , 3, + { 0x2540, 0x034f }, // 79 , 3, + { 0x2541, 0x034c }, // 76 , 3, + { 0x2543, 0x0355 }, // 85 , 3, + { 0x2544, 0x0350 }, // 80 , 3, + { 0x2545, 0x0353 }, // 83 , 3, + { 0x2546, 0x034d }, // 77 , 3, + { 0x2547, 0x0357 }, // 87 , 3, + { 0x2548, 0x0354 }, // 84 , 3, + { 0x2549, 0x0356 }, // 86 , 3, + { 0x254A, 0x0351 }, // 81 , 3, + { 0x2550, 0x0313 }, // 19 , 3, + { 0x2551, 0x0314 }, // 20 , 3, + { 0x2552, 0x031e }, // 30 , 3, + { 0x2553, 0x0322 }, // 34 , 3, + { 0x2554, 0x0315 }, // 21 , 3, + { 0x2555, 0x031f }, // 31 , 3, + { 0x2556, 0x0323 }, // 35 , 3, + { 0x2557, 0x0316 }, // 22 , 3, + { 0x2558, 0x0321 }, // 33 , 3, + { 0x2559, 0x0325 }, // 37 , 3, + { 0x255A, 0x0318 }, // 24 , 3, + { 0x255B, 0x0320 }, // 32 , 3, + { 0x255C, 0x0324 }, // 36 , 3, + { 0x255D, 0x0317 }, // 23 , 3, + { 0x255E, 0x0326 }, // 38 , 3, + { 0x255F, 0x032a }, // 42 , 3, + { 0x2560, 0x0319 }, // 25 , 3, + { 0x2561, 0x0328 }, // 40 , 3, + { 0x2562, 0x032c }, // 44 , 3, + { 0x2563, 0x031b }, // 27 , 3, + { 0x2564, 0x032b }, // 43 , 3, + { 0x2565, 0x0327 }, // 39 , 3, + { 0x2566, 0x031a }, // 26 , 3, + { 0x2567, 0x032d }, // 45 , 3, + { 0x2568, 0x0329 }, // 41 , 3, + { 0x2569, 0x031c }, // 28 , 3, + { 0x256A, 0x032f }, // 47 , 3, + { 0x256B, 0x032e }, // 46 , 3, + { 0x256C, 0x031d }, // 29 , 3, + { 0x2574, 0x0330 }, // 48 , 3, + { 0x2575, 0x0331 }, // 49 , 3, + { 0x2576, 0x0332 }, // 50 , 3, + { 0x2577, 0x0333 }, // 51 , 3, + { 0x2578, 0x0334 }, // 52 , 3, + { 0x2579, 0x0335 }, // 53 , 3, + { 0x257A, 0x0336 }, // 54 , 3, + { 0x257B, 0x0337 }, // 55 , 3, + { 0x257C, 0x0338 }, // 56 , 3, + { 0x257D, 0x033a }, // 58 , 3, + { 0x257E, 0x0339 }, // 57 , 3, + { 0x257F, 0x033b }, // 59 , 3, + { 0x2580, 0x0305 }, // 5 , 3, + { 0x2584, 0x0307 }, // 7 , 3, + { 0x2588, 0x0303 }, // 3 , 3, + { 0x258C, 0x0304 }, // 4 , 3, + { 0x2590, 0x0306 }, // 6 , 3, + { 0x2591, 0x0300 }, // 0 , 3, + { 0x2592, 0x0301 }, // 1 , 3, + { 0x2593, 0x0302 }, // 2 , 3, + { 0x25A0, 0x0402 }, // 2 , 4, + { 0x25A1, 0x0426 }, // 38 , 4, + { 0x25AA, 0x042f }, // 47 , 4, + { 0x25AB, 0x0431 }, // 49 , 4, + { 0x25AC, 0x050b }, // 11 , 5, + { 0x25B2, 0x0573 }, // 115, 5, + { 0x25B3, 0x0688 }, // 136, 6, + { 0x25B4, 0x061d }, // 29 , 6, + { 0x25B5, 0x06ac }, // 172, 6, + { 0x25B8, 0x061b }, // 27 , 6, + { 0x25B9, 0x068b }, // 139, 6, + { 0x25BC, 0x0574 }, // 116, 5, + { 0x25BD, 0x0689 }, // 137, 6, + { 0x25BE, 0x061e }, // 30 , 6, + { 0x25BF, 0x06ad }, // 173, 6, + { 0x25C2, 0x061c }, // 28 , 6, + { 0x25C3, 0x068a }, // 138, 6, + { 0x25C6, 0x0575 }, // 117, 5, + { 0x25C7, 0x066f }, // 111, 6, + { 0x25CA, 0x065f }, // 95 , 6, + { 0x25CB, 0x0401 }, // 1 , 4, + { 0x25CF, 0x0400 }, // 0 , 4, + { 0x25D6, 0x059e }, // 158, 5, + { 0x25D7, 0x0577 }, // 119, 5, + { 0x25D8, 0x0512 }, // 18 , 5, + { 0x25D9, 0x0513 }, // 19 , 5, + { 0x25E6, 0x042d }, // 45 , 4, + { 0x2605, 0x0548 }, // 72, 5, + { 0x260E, 0x051e }, // 30 , 5, + { 0x2610, 0x0518 }, // 24 , 5, + { 0x2612, 0x0519 }, // 25 , 5, + { 0x261B, 0x052a }, // 42 , 5, + { 0x261C, 0x0516 }, // 22 , 5, + { 0x261E, 0x052b }, // 43 , 5, + { 0x2639, 0x051a }, // 26 , 5, + { 0x263A, 0x0507 }, // 7 , 5, + { 0x263B, 0x0508 }, // 8 , 5, + { 0x263C, 0x0506 }, // 6 , 5, + { 0x2640, 0x0505 }, // 5 , 5, + { 0x2642, 0x0504 }, // 4 , 5, + { 0x2660, 0x05ab }, // 171, 5, + { 0x2661, 0x0500 }, // 0 , 5, + { 0x2662, 0x0501 }, // 1 , 5, + { 0x2663, 0x05a8 }, // 168, 5, + { 0x2664, 0x0503 }, // 3 , 5, + { 0x2665, 0x05aa }, // 170, 5, + { 0x2666, 0x05a9 }, // 169, 5, + { 0x2667, 0x0502 }, // 2 , 5, + { 0x266A, 0x0509 }, // 9 , 5, + { 0x266C, 0x050a }, // 10 , 5, + { 0x266D, 0x051c }, // 28 , 5, + { 0x266E, 0x051d }, // 29 , 5, + { 0x266F, 0x051b }, // 27 , 5, + { 0x2701, 0x0521 }, // 33 , 5, + { 0x2702, 0x0522 }, // 34 , 5, + { 0x2703, 0x0523 }, // 35 , 5, + { 0x2704, 0x0524 }, // 36 , 5, + { 0x2706, 0x0526 }, // 38 , 5, + { 0x2707, 0x0527 }, // 39 , 5, + { 0x2708, 0x0528 }, // 40 , 5, + { 0x2709, 0x0529 }, // 41 , 5, + { 0x270C, 0x052c }, // 44 , 5, + { 0x270D, 0x052d }, // 45 , 5, + { 0x270E, 0x052e }, // 46 , 5, + { 0x270F, 0x052f }, // 47 , 5, + { 0x2710, 0x0530 }, // 48 , 5, + { 0x2711, 0x0531 }, // 49 , 5, + { 0x2712, 0x0532 }, // 50 , 5, + { 0x2713, 0x0533 }, // 51 , 5, + { 0x2714, 0x0534 }, // 52 , 5, + { 0x2715, 0x0535 }, // 53 , 5, + { 0x2716, 0x0536 }, // 54 , 5, + { 0x2717, 0x0537 }, // 55 , 5, + { 0x2718, 0x0538 }, // 56 , 5, + { 0x2719, 0x0539 }, // 57 , 5, + { 0x271A, 0x053a }, // 58 , 5, + { 0x271B, 0x053b }, // 59 , 5, + { 0x271C, 0x053c }, // 60 , 5, + { 0x271D, 0x053d }, // 61 , 5, + { 0x271E, 0x053e }, // 62 , 5, + { 0x271F, 0x053f }, // 63 , 5, + { 0x2720, 0x0540 }, // 64 , 5, + { 0x2721, 0x0541 }, // 65 , 5, + { 0x2722, 0x0542 }, // 66 , 5, + { 0x2723, 0x0543 }, // 67 , 5, + { 0x2724, 0x0544 }, // 68 , 5, + { 0x2725, 0x0545 }, // 69 , 5, + { 0x2726, 0x0546 }, // 70 , 5, + { 0x2727, 0x0547 }, // 71 , 5, + { 0x2729, 0x0549 }, // 73 , 5, + { 0x272A, 0x054a }, // 74 , 5, + { 0x272B, 0x054b }, // 75 , 5, + { 0x272C, 0x054c }, // 76 , 5, + { 0x272D, 0x054d }, // 77 , 5, + { 0x272E, 0x054e }, // 78 , 5, + { 0x272F, 0x054f }, // 79 , 5, + { 0x2730, 0x0550 }, // 80 , 5, + { 0x2731, 0x0551 }, // 81 , 5, + { 0x2732, 0x0552 }, // 82 , 5, + { 0x2733, 0x0553 }, // 83 , 5, + { 0x2734, 0x0554 }, // 84 , 5, + { 0x2735, 0x0555 }, // 85 , 5, + { 0x2736, 0x0556 }, // 86 , 5, + { 0x2737, 0x0557 }, // 87 , 5, + { 0x2738, 0x0558 }, // 88 , 5, + { 0x2739, 0x0559 }, // 89 , 5, + { 0x273A, 0x055a }, // 90 , 5, + { 0x273B, 0x055b }, // 91 , 5, + { 0x273C, 0x055c }, // 92 , 5, + { 0x273D, 0x055d }, // 93 , 5, + { 0x273E, 0x055e }, // 94 , 5, + { 0x273F, 0x055f }, // 95 , 5, + { 0x2740, 0x0560 }, // 96 , 5, + { 0x2741, 0x0561 }, // 97 , 5, + { 0x2742, 0x0562 }, // 98 , 5, + { 0x2743, 0x0563 }, // 99 , 5, + { 0x2744, 0x0564 }, // 100, 5, + { 0x2745, 0x0565 }, // 101, 5, + { 0x2746, 0x0566 }, // 102, 5, + { 0x2747, 0x0567 }, // 103, 5, + { 0x2748, 0x0568 }, // 104, 5, + { 0x2749, 0x0569 }, // 105, 5, + { 0x274A, 0x056a }, // 106, 5, + { 0x274B, 0x056b }, // 107, 5, + { 0x274D, 0x056d }, // 109, 5, + { 0x274F, 0x056f }, // 111, 5, + { 0x2750, 0x0570 }, // 112, 5, + { 0x2751, 0x0571 }, // 113, 5, + { 0x2752, 0x0572 }, // 114, 5, + { 0x2756, 0x0576 }, // 118, 5, + { 0x2758, 0x0578 }, // 120, 5, + { 0x2759, 0x0579 }, // 121, 5, + { 0x275A, 0x057a }, // 122, 5, + { 0x275B, 0x057b }, // 123, 5, + { 0x275C, 0x057c }, // 124, 5, + { 0x275D, 0x057d }, // 125, 5, + { 0x275E, 0x057e }, // 126, 5, + { 0x2761, 0x05a1 }, // 161, 5, + { 0x2762, 0x05a2 }, // 162, 5, + { 0x2763, 0x05a3 }, // 163, 5, + { 0x2764, 0x05a4 }, // 164, 5, + { 0x2765, 0x05a5 }, // 165, 5, + { 0x2766, 0x05a6 }, // 166, 5, + { 0x2767, 0x05a7 }, // 167, 5, + { 0x2776, 0x05b6 }, // 182, 5, + { 0x2777, 0x05b7 }, // 183, 5, + { 0x2778, 0x05b8 }, // 184, 5, + { 0x2779, 0x05b9 }, // 185, 5, + { 0x277A, 0x05ba }, // 186, 5, + { 0x277B, 0x05bb }, // 187, 5, + { 0x277C, 0x05bc }, // 188, 5, + { 0x277D, 0x05bd }, // 189, 5, + { 0x277E, 0x05be }, // 190, 5, + { 0x277F, 0x05bf }, // 191, 5, + { 0x2780, 0x05c0 }, // 192, 5, + { 0x2781, 0x05c1 }, // 193, 5, + { 0x2782, 0x05c2 }, // 194, 5, + { 0x2783, 0x05c3 }, // 195, 5, + { 0x2784, 0x05c4 }, // 196, 5, + { 0x2785, 0x05c5 }, // 197, 5, + { 0x2786, 0x05c6 }, // 198, 5, + { 0x2787, 0x05c7 }, // 199, 5, + { 0x2788, 0x05c8 }, // 200, 5, + { 0x2789, 0x05c9 }, // 201, 5, + { 0x278A, 0x05ca }, // 202, 5, + { 0x278B, 0x05cb }, // 203, 5, + { 0x278C, 0x05cc }, // 204, 5, + { 0x278D, 0x05cd }, // 205, 5, + { 0x278E, 0x05ce }, // 206, 5, + { 0x278F, 0x05cf }, // 207, 5, + { 0x2790, 0x05d0 }, // 208, 5, + { 0x2791, 0x05d1 }, // 209, 5, + { 0x2792, 0x05d2 }, // 210, 5, + { 0x2793, 0x05d3 }, // 211, 5, + { 0x2794, 0x05d4 }, // 212, 5, + { 0x2798, 0x05d8 }, // 216, 5, + { 0x2799, 0x05d9 }, // 217, 5, + { 0x279A, 0x05da }, // 218, 5, + { 0x279B, 0x05db }, // 219, 5, + { 0x279C, 0x05dc }, // 220, 5, + { 0x279D, 0x05dd }, // 221, 5, + { 0x279E, 0x05de }, // 222, 5, + { 0x279F, 0x05df }, // 223, 5, + { 0x27A0, 0x05e0 }, // 224, 5, + { 0x27A1, 0x05e1 }, // 225, 5, + { 0x27A2, 0x05e2 }, // 226, 5, + { 0x27A3, 0x05e3 }, // 227, 5, + { 0x27A4, 0x05e4 }, // 228, 5, + { 0x27A5, 0x05e5 }, // 229, 5, + { 0x27A6, 0x05e6 }, // 230, 5, + { 0x27A7, 0x05e7 }, // 231, 5, + { 0x27A8, 0x05e8 }, // 232, 5, + { 0x27A9, 0x05e9 }, // 233, 5, + { 0x27AA, 0x05ea }, // 234, 5, + { 0x27AB, 0x05eb }, // 235, 5, + { 0x27AC, 0x05ec }, // 236, 5, + { 0x27AD, 0x05ed }, // 237, 5, + { 0x27AE, 0x05ee }, // 238, 5, + { 0x27AF, 0x05ef }, // 239, 5, + { 0x27B1, 0x05f1 }, // 241, 5, + { 0x27B2, 0x05f2 }, // 242, 5, + { 0x27B3, 0x05f3 }, // 243, 5, + { 0x27B4, 0x05f4 }, // 244, 5, + { 0x27B5, 0x05f5 }, // 245, 5, + { 0x27B6, 0x05f6 }, // 246, 5, + { 0x27B7, 0x05f7 }, // 247, 5, + { 0x27B8, 0x05f8 }, // 248, 5, + { 0x27B9, 0x05f9 }, // 249, 5, + { 0x27BA, 0x05fa }, // 250, 5, + { 0x27BB, 0x05fb }, // 251, 5, + { 0x27BC, 0x05fc }, // 252, 5, + { 0x27BD, 0x05fd }, // 253, 5, + { 0x27BE, 0x05fe }, // 254, 5, + { 0xFB00, 0x0433 }, // 51 , 4, + { 0xFB01, 0x0436 }, // 54 , 4, + { 0xFB02, 0x0437 }, // 55 , 4, + { 0xFB03, 0x0434 }, // 52 , 4, + { 0xFB04, 0x0435 }, // 53 , 4, + { 0xFB1E, 0x0930 }, // 48 , 9, + { 0xFF61, 0x0b00 }, // 0 , 11, + { 0xFF62, 0x0b01 }, // 1 , 11, + { 0xFF63, 0x0b02 }, // 2 , 11, + { 0xFF64, 0x0b03 }, // 3 , 11, + { 0xFF65, 0x0b04 }, // 4 , 11, + { 0xFF66, 0x0b05 }, // 5 , 11, + { 0xFF67, 0x0b06 }, // 6 , 11, + { 0xFF68, 0x0b07 }, // 7 , 11, + { 0xFF69, 0x0b08 }, // 8 , 11, + { 0xFF6A, 0x0b09 }, // 9 , 11, + { 0xFF6B, 0x0b0a }, // 10 , 11, + { 0xFF6C, 0x0b0b }, // 11 , 11, + { 0xFF6D, 0x0b0c }, // 12 , 11, + { 0xFF6E, 0x0b0d }, // 13 , 11, + { 0xFF6F, 0x0b0e }, // 14 , 11, + { 0xFF70, 0x0b0f }, // 15 , 11, + { 0xFF71, 0x0b10 }, // 16 , 11, + { 0xFF72, 0x0b11 }, // 17 , 11, + { 0xFF73, 0x0b12 }, // 18 , 11, + { 0xFF74, 0x0b13 }, // 19 , 11, + { 0xFF75, 0x0b14 }, // 20 , 11, + { 0xFF76, 0x0b15 }, // 21 , 11, + { 0xFF77, 0x0b16 }, // 22 , 11, + { 0xFF78, 0x0b17 }, // 23 , 11, + { 0xFF79, 0x0b18 }, // 24 , 11, + { 0xFF7A, 0x0b19 }, // 25 , 11, + { 0xFF7B, 0x0b1a }, // 26 , 11, + { 0xFF7C, 0x0b1b }, // 27 , 11, + { 0xFF7D, 0x0b1c }, // 28 , 11, + { 0xFF7E, 0x0b1d }, // 29 , 11, + { 0xFF7F, 0x0b1e }, // 30 , 11, + { 0xFF80, 0x0b1f }, // 31 , 11, + { 0xFF81, 0x0b20 }, // 32 , 11, + { 0xFF82, 0x0b21 }, // 33 , 11, + { 0xFF83, 0x0b22 }, // 34 , 11, + { 0xFF84, 0x0b23 }, // 35 , 11, + { 0xFF85, 0x0b24 }, // 36 , 11, + { 0xFF86, 0x0b25 }, // 37 , 11, + { 0xFF87, 0x0b26 }, // 38 , 11, + { 0xFF88, 0x0b27 }, // 39 , 11, + { 0xFF89, 0x0b28 }, // 40 , 11, + { 0xFF8A, 0x0b29 }, // 41 , 11, + { 0xFF8B, 0x0b2a }, // 42 , 11, + { 0xFF8C, 0x0b2b }, // 43 , 11, + { 0xFF8D, 0x0b2c }, // 44 , 11, + { 0xFF8E, 0x0b2d }, // 45 , 11, + { 0xFF8F, 0x0b2e }, // 46 , 11, + { 0xFF90, 0x0b2f }, // 47 , 11, + { 0xFF91, 0x0b30 }, // 48 , 11, + { 0xFF92, 0x0b31 }, // 49 , 11, + { 0xFF93, 0x0b32 }, // 50 , 11, + { 0xFF94, 0x0b33 }, // 51 , 11, + { 0xFF95, 0x0b34 }, // 52 , 11, + { 0xFF96, 0x0b35 }, // 53 , 11, + { 0xFF97, 0x0b36 }, // 54 , 11, + { 0xFF98, 0x0b37 }, // 55 , 11, + { 0xFF99, 0x0b38 }, // 56 , 11, + { 0xFF9A, 0x0b39 }, // 57 , 11, + { 0xFF9B, 0x0b3a }, // 58 , 11, + { 0xFF9C, 0x0b3b }, // 59 , 11, + { 0xFF9D, 0x0b3c }, // 60 , 11, + { 0xFF9E, 0x0b3d }, // 61 , 11, + { 0xFF9F, 0x0b3e } // 62 , 11 +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +typedef struct BASE_DIACRIT_TABLE +{ + FLMBYTE base; + FLMBYTE diacrit; +} BASE_DIACRIT_TABLE; + +/**************************************************************************** +Desc: +****************************************************************************/ +typedef struct BASE_DIACRIT +{ + FLMUINT16 char_count; // Number of characters in table + FLMUINT16 start_char; // Start char + BASE_DIACRIT_TABLE * table; +} BASE_DIACRIT; + +/**************************************************************************** +Desc: +****************************************************************************/ +FSTATIC BASE_DIACRIT_TABLE flm_ml1c_table[] = +{ + { 'A', acute }, + { 'a', acute }, + { 'A', circum }, + { 'a', circum }, + { 'A', umlaut }, + { 'a', umlaut }, + { 'A', grave }, + { 'a', grave }, + { 'A', ring }, + { 'a', ring }, + { 0xff, 0xff }, + { 0xff, 0xff }, + { 'C', cedilla }, + { 'c', cedilla }, + { 'E', acute }, + { 'e',acute }, + { 'E',circum }, + { 'e',circum }, + { 'E',umlaut }, + { 'e',umlaut }, + { 'E',grave }, + { 'e',grave }, + { 'I',acute }, + { dotlesi,acute }, + { 'I',circum }, + { dotlesi,circum }, + { 'I',umlaut }, + { dotlesi,umlaut }, + { 'I',grave }, + { dotlesi,grave }, + { 'N',tilde }, + { 'n',tilde }, + { 'O',acute }, + { 'o',acute }, + { 'O',circum }, + { 'o',circum }, + { 'O',umlaut }, + { 'o',umlaut }, + { 'O',grave }, + { 'o',grave }, + { 'U',acute }, + { 'u',acute }, + { 'U',circum }, + { 'u',circum }, + { 'U',umlaut }, + { 'u',umlaut }, + { 'U',grave }, + { 'u',grave }, + { 'Y',umlaut }, + { 'y',umlaut }, + { 'A',tilde }, + { 'a',tilde }, + { 'D',crossb }, + { 'd',crossb }, + { 'O',slash }, + { 'o',slash }, + { 'O',tilde }, + { 'o',tilde }, + { 'Y',acute }, + { 'y',acute }, + { 0xff,0xff }, + { 0xff,0xff }, + { 0xff,0xff }, + { 0xff,0xff }, + { 'A',breve }, + { 'a',breve }, + { 'A',macron }, + { 'a',macron }, + { 'A',ogonek }, + { 'a',ogonek }, + { 'C',acute }, + { 'c',acute }, + { 'C',caron }, + { 'c',caron }, + { 'C',circum }, + { 'c',circum }, + { 'C',dota }, + { 'c',dota }, + { 'D',caron }, + { 'd',caron }, + { 'E',caron }, + { 'e',caron }, + { 'E',dota }, + { 'e',dota }, + { 'E',macron }, + { 'e',macron }, + { 'E',ogonek }, + { 'e',ogonek }, + { 'G',acute }, + { 'g',acute }, + { 'G',breve }, + { 'g',breve }, + { 'G',caron }, + { 'g',caron }, + { 'G',cedilla }, + { 'g',aposab }, + { 'G',circum }, + { 'g',circum }, + { 'G',dota }, + { 'g',dota }, + { 'H',circum }, + { 'h',circum }, + { 'H',crossb }, + { 'h',crossb }, + { 'I',dota }, + { dotlesi,dota }, + { 'I',macron }, + { dotlesi,macron }, + { 'I',ogonek }, + { 'i',ogonek }, + { 'I',tilde }, + { dotlesi,tilde }, + { 0xff,0xff }, + { 0xff,0xff }, + { 'J',circum }, + { dotlesj,circum }, + { 'K',cedilla }, + { 'k',cedilla }, + { 'L',acute }, + { 'l',acute }, + { 'L',caron }, + { 'l',caron }, + { 'L',cedilla }, + { 'l',cedilla }, + { 'L',centerd }, + { 'l',centerd }, + { 'L',stroke }, + { 'l',stroke }, + { 'N',acute }, + { 'n',acute }, + { 'N',aposba }, + { 'n',aposba }, + { 'N',caron }, + { 'n',caron }, + { 'N',cedilla }, + { 'n',cedilla }, + { 'O',dacute }, + { 'o',dacute }, + { 'O',macron }, + { 'o',macron }, + { 0xff,0xff }, + { 0xff,0xff }, + { 'R',acute }, + { 'r',acute }, + { 'R',caron }, + { 'r',caron }, + { 'R',cedilla }, + { 'r',cedilla }, + { 'S',acute }, + { 's',acute }, + { 'S',caron }, + { 's',caron }, + { 'S',cedilla }, + { 's',cedilla }, + { 'S',circum }, + { 's',circum }, + { 'T',caron }, + { 't',caron }, + { 'T',cedilla }, + { 't',cedilla }, + { 'T',crossb }, + { 't',crossb }, + { 'U',breve }, + { 'u',breve }, + { 'U',dacute }, + { 'u',dacute }, + { 'U',macron }, + { 'u',macron }, + { 'U',ogonek }, + { 'u',ogonek }, + { 'U',ring }, + { 'u',ring }, + { 'U',tilde }, + { 'u',tilde }, + { 'W',circum }, + { 'w',circum }, + { 'Y',circum }, + { 'y',circum }, + { 'Z',acute }, + { 'z',acute }, + { 'Z',caron }, + { 'z',caron }, + { 'Z',dota }, + { 'z',dota }, + { 0xff,0xff }, + { 0xff,0xff }, + { 'D',macron }, + {'d',macron }, + {'L',macron }, + {'l',macron }, + {'N',macron }, + {'n',macron }, + {'R',grave }, + {'r',grave }, + {'S',macron }, + {'s',macron }, + {'T',macron }, + {'t',macron }, + {'Y',breve }, + {'y',breve }, + {'Y',grave }, + {'y',grave }, + {'D',aposbes }, + {'d',aposbes }, + {'O',aposbes }, + {'o',aposbes }, + {'U',aposbes }, + {'u',aposbes }, + {'E',breve }, + {'e',breve }, + {'I',breve }, + {dotlesi,breve }, + {0xff,0xff }, + {0xff,0xff }, + {'O',breve }, + {'o',breve } +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FSTATIC BASE_DIACRIT flm_ml1c = +{ + 216, // Number of characters in table + 26, // Start char + flm_ml1c_table, +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FSTATIC BASE_DIACRIT_TABLE flm_grk_c_table[] = +{ + { 0, ghprime }, // ALPHA High Prime + { 1, gacute }, // alpha acute + { 10, ghprime }, // EPSILON High Prime + { 11, gacute }, // epsilon Acute + { 14, ghprime }, // ETA High Prime + { 15, gacute }, // eta Acute + { 18, ghprime }, // IOTA High Prime + { 19, gacute }, // iota Acute + { 0xFF, 0xFF }, // IOTA Diaeresis + { 19, gdia }, // iota Diaeresis + { 30, ghprime }, // OMICRON High Prime + { 31, gacute }, // omicron Acute + { 42, ghprime }, // UPSILON High Prime + { 43, gacute }, // upsilon Acute + { 0xFF, 0xFF }, // UPSILON Diaeresis + { 43,gdia }, // upsilon Diaeresis + { 50,ghprime }, // OMEGA High Prime + { 51,gacute }, // omega Acute + { 0xFF, 0xFF }, // epsilon (Variant) + { 0xFF, 0xFF }, // theta (Variant) + { 0xFF, 0xFF }, // kappa (Variant) + { 0xFF, 0xFF }, // pi (Variant) + { 0xFF, 0xFF }, // rho (Variant) + { 0xFF, 0xFF }, // sigma (Variant) + { 0xFF, 0xFF }, // UPSILON (Variant) + { 0xFF, 0xFF }, // phi (Variant) + { 0xFF, 0xFF }, // omega (Variant) + { 0xFF, 0xFF }, // Greek Question Mark + { 0xFF, 0xFF }, // Greek Semicolon + { 0xFF, 0xFF }, // High Prime + { 0xFF, 0xFF }, // Low Prime + { 0xFF, 0xFF }, // Acute (Greek) + { 0xFF, 0xFF }, // Diaeresis (Greek) + { gacute,gdia }, // Acute Diaeresis + { ggrave, gdia }, // Grave Diaeresis + { 0xFF, 0xFF }, // Grave (Greek) + { 0xFF, 0xFF }, // Circumflex (Greek) + { 0xFF, 0xFF }, // Smooth Breathing + { 0xFF, 0xFF }, // Rough Breathing + { 0xFF, 0xFF }, // Iota Subscript + { gsmooth, gacute }, // Smooth Breathing Acute + { grough, gacute }, // Rough Breathing Acute + { gsmooth, ggrave }, // Smooth Breathing Grave + { grough, ggrave }, // Rough Breathing Grave + { gsmooth, gcircm }, // Smooth Breathing Circumflex + { grough, gcircm }, // Rough Breathing Circumflex + { gacute, giota }, // Acute w/Iota Subscript + { ggrave, giota }, // Grave w/Iota Subscript + { gcircm, giota }, // Circumflex w/Iota Subscript + { gsmooth, giota }, // Smooth Breathing w/Iota Subscript + { grough, giota }, // Rough Breathing w/Iota Subscript + { gsmact, giota }, // Smooth Breathing Acute w/Iota Subscript + { grgact, giota }, // Rough Breathing Acute w/Iota Subscript + { gsmgrv, giota }, // Smooth Breathing Grave w/Iota Subscript + { grggrv, giota }, // Rough Breathing Grave w/Iota Subscript + { gsmcir, giota }, // Smooth Breathing Circumflex w/Iota Sub + { grgcir, giota }, // Rough Breathing Circumflex w/Iota Sub + { 1, ggrave }, // alpha Grave + { 1, gcircm }, // alpha Circumflex + { 1, giota }, // alpha w/Iota + { 1, gactio }, // alpha Acute w/Iota + { 1, ggrvio }, // alpha Grave w/Iota + { 1, gcirio }, // alpha Circumflex w/Iota + { 1, gsmooth }, // alpha Smooth + { 1, gsmact }, // alpha Smooth Acute + { 1, gsmgrv }, // alpha Smooth Grave + { 1, gsmcir }, // alpha Smooth Circumflex + { 1, gsmio }, // alpha Smooth w/Iota + { 1, gsmaio }, // alpha Smooth Acute w/Iota + { 1, gsmgvio }, // alpha Smooth Grave w/Iota + { 1, gsmcio }, // alpha Smooth Circumflex w/Iota + { 1, grough }, // alpha Rough + { 1, grgact }, // alpha Rough Acute + { 1, grggrv }, // alpha Rough Grave + { 1, grgcir }, // alpha Rough Circumflex + { 1, grgio }, // alpha Rough w/Iota + { 1, grgaio }, // alpha Rough Acute w/Iota + { 1, grggvio }, // alpha Rough Grave w/Iota + { 1, grgcio }, // alpha Rough Circumflex w/Iota + { 11, ggrave }, // epsilon Grave + { 11, gsmooth }, // epsilon Smooth + { 11, gsmact }, // epsilon Smooth Acute + { 11, gsmgrv }, // epsilon Smooth Grave + { 11, grough }, // epsilon Rough + { 11, grgact }, // epsilon Rough Acute + { 11, grggrv }, // epsilon Rough Grave + { 15, ggrave }, // eta Grave + { 15, gcircm }, // eta Circumflex + { 15, giota }, // eta w/Iota + { 15, gactio }, // eta Acute w/Iota + { 15, ggrvio }, // eta Grave w/Iota + { 15, gcirio }, // eta Circumflex w/Iota + { 15, gsmooth }, // eta Smooth + { 15, gsmact }, // eta Smooth Acute + { 15, gsmgrv }, // eta Smooth Grave + { 15, gsmcir }, // eta Smooth Circumflex + { 15, gsmio }, // eta Smooth w/Iota + { 15, gsmaio }, // eta Smooth Acute w/Iota + { 15, gsmgvio }, // eta Smooth Grave w/Iota + { 15, gsmcio }, // eta Smooth Circumflex w/Iota + { 15, grough }, // eta Rough + { 15, grgact }, // eta Rough Acute + { 15, grggrv }, // eta Rough Grave + { 15, grgcir }, // eta Rough Circumflex + { 15, grgio }, // eta Rough w/Iota + { 15, grgaio }, // eta Rough Acute w/Iota + { 15, grggvio }, // eta Rough Grave w/Iota + { 15, grgcio }, // eta Rough Circumflex w/Iota + { 19, ggrave }, // iota Grave + { 19, gcircm }, // iota Circumflex + { 19, gactdia }, // iota Acute Diaeresis + { 19, ggrvdia }, // iota Grave Diaeresis + { 19, gsmooth }, // iota Smooth + { 19, gsmact }, // iota Smooth Acute + { 19, gsmgrv }, // iota Smooth Grave + { 19, gsmcir }, // iota Smooth Circumflex + { 19, grough }, // iota Rough + { 19, grgact }, // iota Rough Acute + { 19, grggrv }, // iota Rough Grave + { 19, grgcir }, // iota Rough Circumflex + { 31, ggrave }, // omicron Grave + { 31, gsmooth }, // omicron Smooth + { 31, gsmact }, // omicron Smooth Acute + { 31, gsmgrv }, // omicron Smooth Grave + { 31, grough }, // omicron Rough + { 31, grgact }, // omicron Rough Acute + { 31, grggrv }, // omicron Rough Grave + { 0xFF, 0xFF }, // rho rough + { 0xFF, 0xFF }, // rho smooth + { 43, ggrave }, // upsilon Grave + { 43, gcircm }, // upsilon Circumflex + { 43, gactdia }, // upsilon Acute Diaeresis + { 43, ggrvdia }, // upsilon Grave Diaeresis + { 43, gsmooth }, // upsilon Smooth + { 43, gsmact }, // upsilon Smooth Acute + { 43, gsmgrv }, // upsilon Smooth Grave + { 43, gsmcir }, // upsilon Smooth Circumflex + { 43, grough }, // upsilon Rough + { 43, grgact }, // upsilon Rough Acute + { 43, grggrv }, // upsilon Rough Grave + { 43, grgcir }, // upsilon Rough Circumflex + { 51, ggrave }, // omega Grave + { 51, gcircm }, // omega Circumflex + { 51, giota }, // omega w/Iota + { 51, gactio }, // omega Acute w/Iota + { 51, ggrvio }, // omega Grave w/Iota + { 51, gcirio }, // omega Circumflex w/Iota + { 51, gsmooth }, // omega Smooth + { 51, gsmact }, // omega Smooth Acute + { 51, gsmgrv }, // omega Smooth Grave + { 51, gsmcir }, // omega Smooth Circumflex + { 51, gsmio }, // omega Smooth w/Iota + { 51, gsmaio }, // omega Smooth Acute w/Iota + { 51, gsmgvio }, // omega Smooth Grave w/Iota + { 51, gsmcio }, // omega Smooth Circumflex w/Iota + { 51, grough }, // omega Rough + { 51, grgact }, // omega Rough Acute + { 51, grggrv }, // omega Rough Grave + { 51, grgcir }, // omega Rough Circumflex + { 51, grgio }, // omega Rough w/Iota + { 51, grgaio }, // omega Rough Acute w/Iota + { 51, grggvio }, // omega Rough Grave w/Iota + { 51, grgcio} // omega Rough Circumflex w/Iota +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FSTATIC BASE_DIACRIT flm_grk_c = +{ + 163, // Number of characters in table + 52, // Start char + flm_grk_c_table +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FSTATIC BASE_DIACRIT_TABLE flm_rus_c_table[] = +{ + { 14, 204 }, // ZHE with right descender + { 15, 204 }, // zhe with right descender + { 0xFF, 0xFF}, // DZE + { 0xFF, 0xFF}, // dze + { 0xFF, 0xFF}, // Z + { 0xFF, 0xFF}, // z + { 18, 206 }, // II with macron + { 19, 206}, // ii with macron + { 0xFF, 0xFF}, // I + { 0xFF, 0xFF}, // i + { 0xFF, 0xFF}, // YI + { 0xFF, 0xFF}, // yi + { 0xFF, 0xFF}, // I ligature + { 0xFF, 0xFF}, // i ligature + { 0xFF, 0xFF}, // JE + { 0xFF, 0xFF}, // je + { 0xFF, 0xFF}, // KJE + { 0xFF, 0xFF}, // kje + { 22, 204}, // KA with right descender + { 23, 204}, // ka with right descender + { 22, 205 }, // KA ogonek + { 23, 205 }, // ka ogonek + { 0xFF, 0xFF}, // KA vertical bar + { 0xFF, 0xFF}, // ka vertical bar + { 0xFF, 0xFF}, // LJE + { 0xFF, 0xFF}, // lje + { 28, 204 }, // EN with right descender + { 29, 204 }, // en with right descender + { 0xFF, 0xFF}, // NJE + { 0xFF, 0xFF}, // nje + { 0xFF, 0xFF}, // ROUND OMEGA + { 0xFF, 0xFF}, // round omega + { 0xFF, 0xFF}, // OMEGA + { 0xFF, 0xFF}, // omega + { 0xFF, 0xFF}, // TSHE + { 0xFF, 0xFF}, // tshe + { 0xFF, 0xFF}, // SHORT U + { 0xFF, 0xFF}, // short u + { 40, 206}, // U with macron + { 41, 206 }, // u with macron + { 0xFF, 0xFF}, // STRAIGHT U + { 0xFF, 0xFF}, // straight u + { 0xFF, 0xFF}, // STRAIGHT U BAR + { 0xFF, 0xFF}, // straight u bar + { 0xFF, 0xFF}, // OU ligature + { 0xFF, 0xFF}, // ou ligature + { 44, 204 }, // KHA with right descender + { 45, 204 }, // kha with right descender + { 44, 205 }, // KHA ogonek + { 45, 205 }, // kha ogonek + { 0xFF, 0xFF}, // H + { 0xFF, 0xFF}, // h + { 0xFF, 0xFF}, // OMEGA titlo + { 0xFF, 0xFF}, // omega titlo + { 0xFF, 0xFF}, // DZHE + { 0xFF, 0xFF}, // dzhe + { 48, 204 }, // CHE with right descender + { 49, 204 }, // che with right descender + { 0xFF, 0xFF}, // CHE vertical bar + { 0xFF, 0xFF}, // che vertical bar + { 0xFF, 0xFF}, // SHCHA (variant) + { 0xFF, 0xFF}, // shcha (variant) + { 0xFF, 0xFF}, // YAT + { 0xFF, 0xFF}, // yat + { 0xFF, 0xFF}, // YUS BOLSHOI + { 0xFF, 0xFF}, // yus bolshoi + { 0xFF, 0xFF}, // BIG MALYI + { 0xFF, 0xFF}, // big malyi + { 0xFF, 0xFF}, // KSI + { 0xFF, 0xFF}, // ksi + { 0xFF, 0xFF}, // PSI + { 0xFF, 0xFF}, // psi + { 0xFF, 0xFF}, // FITA + { 0xFF, 0xFF}, // fita + { 0xFF, 0xFF}, // IZHITSA + { 0xFF, 0xFF}, // izhitsa + { 00, racute}, // Russian A acute + { 01, racute }, // Russian a acute + { 10, racute }, // Russian IE acute + { 11, racute }, // Russian ie acute + { 78, racute }, // Russian E acute + { 79, racute }, // Russian e acute + { 18, racute }, // Russian II acute + { 19, racute }, // Russian ii acute + { 88, racute }, // Russian I acute + { 89, racute }, // Russian i acute + { 90, racute }, // Russian YI acute + { 91, racute }, // Russian yi acute + { 30, racute }, // Russian O acute + { 31, racute }, // Russian o acute + { 40, racute }, // Russian U acute + { 41, racute }, // Russian u acute + { 56, racute }, // Russian YERI acute + { 57, racute }, // Russian yeri acute + { 60, racute }, // Russian REVERSED E acute + { 61, racute }, // Russian reversed e acute + { 62, racute }, // Russian IU acute + { 63, racute }, // Russian iu acute + { 64, racute }, // Russian IA acute + { 65, racute }, // Russian ia acute + { 00, rgrave }, // Russian A grave + { 01, rgrave }, // Russian a grave + { 10, rgrave }, // Russian IE grave + { 11, rgrave }, // Russian ie grave + { 12, rgrave }, // Russian YO grave + { 13, rgrave }, // Russian yo grave + { 18, rgrave }, // Russian I grave + { 19, rgrave }, // Russian i grave + { 30, rgrave }, // Russian O grave + { 31, rgrave }, // Russian o grave + { 40, rgrave }, // Russian U grave + { 41, rgrave }, // Russian u grave + { 56, rgrave }, // Russian YERI grave + { 57, rgrave }, // Russian yeri grave + { 60, rgrave }, // Russian REVERSED E grave + { 61, rgrave }, // Russian reversed e grave + { 62, rgrave }, // Russian IU grave + { 63, rgrave }, // Russian iu grave + { 64, rgrave }, // Russian IA grave + { 65, rgrave} // Russian ia grave +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FSTATIC BASE_DIACRIT flm_rus_c = { + 120, // Number of characters in table + 156, // Start char + flm_rus_c_table, +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FSTATIC BASE_DIACRIT * flm_car60_c[ F_NCHSETS] = +{ + (BASE_DIACRIT*)0, // no composed characters for ascii + &flm_ml1c, + (BASE_DIACRIT*)0, // no composed characters for multinational 2 + (BASE_DIACRIT*)0, // no composed characters for line draw + (BASE_DIACRIT*)0, // no composed characters for typographic + (BASE_DIACRIT*)0, // no composed characters for icons + (BASE_DIACRIT*)0, // no composed characters for math + (BASE_DIACRIT*)0, // no composed characters for math extension + &flm_grk_c, // Greek + (BASE_DIACRIT*)0, // Hebrew + &flm_rus_c, // Cyrillic - Russian + (BASE_DIACRIT*)0, // Hiragana or Katakana (Japanese) + (BASE_DIACRIT*)0, // no composed characters for user + (BASE_DIACRIT*)0, // no composed characters for Arabic + (BASE_DIACRIT*)0, // no composed characters for Arabic Script +}; + +/**************************************************************************** +Desc: Base character location bit mapped table. +****************************************************************************/ +FSTATIC FLMBYTE flm_ml1_cb60[] = +{ + 0x00, // 0-7 + 0x00, // 8-15 + 0x00, // 16-23 + 0x00, // 24-31 + 0x00, // 32-39 + 0x00, // 40-47 + 0x55, // 48-55 + 0x00, // 56-63 + 0x00, // 64-71 + 0x00, // 72-79 + 0x00, // 80-87 + 0x00, // 88-95 + 0x00, // 96-103 + 0x00, // 104-111 + 0x00, // 112-119 + 0x00, // 120-127 + 0x14, // 128-135 + 0x44, // 136-143 + 0x00, // 144-151 + 0x00, // 152-159 + 0x00, // 160-167 + 0x00, // 168-175 + 0x00, // 176-183 + 0x00, // 184-191 + 0x00, // 192-199 + 0x00, // 200-207 + 0x00, // 208-215 + 0x00, // 216-223 + 0x00, // 224-231 + 0x04, // 232-239 + 0x00, // 240-241 +}; + +/**************************************************************************** +Desc: +****************************************************************************/ + +#define ASCTBLLEN 95 +#define MNTBLLEN 219 +#define SYMTBLLEN 9 +#define GRKTBLLEN 219 +#define CYRLTBLLEN 200 +#define HEBTBL1LEN 27 +#define HEBTBL2LEN 35 +#define AR1TBLLEN 158 +#define AR2TBLLEN 179 + +#define COLLS 32 // first collating number (space/end of line) +#define COLS1 (COLLS+9) // quotes +#define COLS2 (COLS1+5) // parens +#define COLS3 (COLS2+6) // money +#define COLS4 (COLS3+6) // math ops +#define COLS5 (COLS4+8) // math others +#define COLS6 (COLS5+14) // others: %#&@\_|~ +#define COLS7 (COLS6+13) // greek +#define COLS8 (COLS7+25) // numbers +#define COLS9 (COLS8+10) // alphabet +#define COLS10 (COLS9+60) // cyrillic +#define COLS10h (COLS9+42) // hebrew - writes over european and cyrilic +#define COLS10a (COLS10h+28) // arabic - inclusive from 198(C6)- 252(FC) +#define COLS11 253 // End of list - arabic goes to the end +#define COLS0_ARABIC COLS11 // Set if arabic accent marking +#define COLS0_HEBREW COLS11 // Set if hebrew accent marking +#define COLSOEM 254 // OEM character in upper range - non-collatable +#define COLS0_UNICODE 254 // Use this for UNICODE +#define COLS0 255 // graphics/misc - chars without a collate value + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMBYTE flmAsc60Tbl[ ASCTBLLEN + 2] = +{ + 0x20, // Initial character offset + ASCTBLLEN, // Length of this table + COLLS, // + COLLS+5, // ! + COLS1, // " + COLS6+1, // # + COLS3, // $ + COLS6, // % + COLS6+2, // & + COLS1+1, // ' + COLS2, // ( + COLS2+1, // ) + COLS4+2, // * + COLS4, // + + COLLS+2, // , + COLS4+1, // - + COLLS+1, // . + COLS4+3, // / + COLS8, // 0 + COLS8+1, // 1 + COLS8+2, // 2 + COLS8+3, // 3 + COLS8+4, // 4 + COLS8+5, // 5 + COLS8+6, // 6 + COLS8+7, // 7 + COLS8+8, // 8 + COLS8+9, // 9 + COLLS+3, // : + COLLS+4, // ; + COLS5, // < + COLS5+2, // = + COLS5+4, // > + COLLS+7, // ? + COLS6+3, // @ + COLS9, // A + COLS9+2, // B + COLS9+3, // C + COLS9+6, // D + COLS9+7, // E + COLS9+8, // F + COLS9+9, // G + COLS9+10, // H + COLS9+12, // I + COLS9+14, // J + COLS9+15, // K + COLS9+16, // L + COLS9+18, // M + COLS9+19, // N + COLS9+21, // O + COLS9+23, // P + COLS9+24, // Q + COLS9+25, // R + COLS9+27, // S + COLS9+29, // T + COLS9+30, // U + COLS9+31, // V + COLS9+32, // W + COLS9+33, // X + COLS9+34, // Y + COLS9+35, // Z + COLS9+40, // [ + COLS6+4, // backslash + COLS9+41, // ] + COLS4+4, // ^ + COLS6+5, // _ + COLS1+2, // ` + COLS9, // a + COLS9+2, // b + COLS9+3, // c + COLS9+6, // d + COLS9+7, // e + COLS9+8, // f + COLS9+9, // g + COLS9+10, // h + COLS9+12, // i + COLS9+14, // j + COLS9+15, // k + COLS9+16, // l + COLS9+18, // m + COLS9+19, // n + COLS9+21, // o + COLS9+23, // p + COLS9+24, // q + COLS9+25, // r + COLS9+27, // s + COLS9+29, // t + COLS9+30, // u + COLS9+31, // v + COLS9+32, // w + COLS9+33, // x + COLS9+34, // y + COLS9+35, // z + COLS2+4, // { + COLS6+6, // | + COLS2+5, // } + COLS6+7 // ~ +}; + +/**************************************************************************** +Desc: Multinational table +****************************************************************************/ +FLMBYTE flmMn60Tbl[ MNTBLLEN + 2] = +{ + 23, // Initial character offset + MNTBLLEN, // Length of this table + COLS9+27, // German Double s + COLS9+15, // Icelandic k + COLS9+14, // Dotless j + + // IBM Charset + + COLS9, // A Acute + COLS9, // a Acute + COLS9, // A Circumflex + COLS9, // a Circumflex + COLS9, // A Diaeresis or Umlaut + COLS9, // a Diaeresis or Umlaut + COLS9, // A Grave + COLS9, // a Grave + COLS9, // A Ring + COLS9, // a Ring + COLS9+1, // AE digraph + COLS9+1, // ae digraph + COLS9+3, // C Cedilla + COLS9+3, // c Cedilla + COLS9+7, // E Acute + COLS9+7, // e Acute + COLS9+7, // E Circumflex + COLS9+7, // e Circumflex + COLS9+7, // E Diaeresis or Umlaut + COLS9+7, // e Diaeresis or Umlaut + COLS9+7, // E Grave + COLS9+7, // e Grave + COLS9+12, // I Acute + COLS9+12, // i Acute + COLS9+12, // I Circumflex + COLS9+12, // i Circumflex + COLS9+12, // I Diaeresis or Umlaut + COLS9+12, // i Diaeresis or Umlaut + COLS9+12, // I Grave + COLS9+12, // i Grave + COLS9+20, // N Tilde + COLS9+20, // n Tilde + COLS9+21, // O Acute + COLS9+21, // o Acute + COLS9+21, // O Circumflex + COLS9+21, // o Circumflex + COLS9+21, // O Diaeresis or Umlaut + COLS9+21, // o Diaeresis or Umlaut + COLS9+21, // O Grave + COLS9+21, // o Grave + COLS9+30, // U Acute + COLS9+30, // u Acute + COLS9+30, // U Circumflex + COLS9+30, // u Circumflex + COLS9+30, // U Diaeresis or Umlaut + COLS9+30, // u Diaeresis or Umlaut + COLS9+30, // U Grave + COLS9+30, // u Grave + COLS9+34, // Y Diaeresis or Umlaut + COLS9+34, // y Diaeresis or Umlaut + + // IBM foreign + + COLS9, // A Tilde + COLS9, // a Tilde + COLS9+6, // D Cross Bar + COLS9+6, // d Cross Bar + COLS9+21, // O Slash + COLS9+21, // o Slash + COLS9+21, // O Tilde + COLS9+21, // o Tilde + COLS9+34, // Y Acute + COLS9+34, // y Acute + COLS9+6, // Uppercase Eth + COLS9+6, // Lowercase Eth + COLS9+37, // Uppercase Thorn + COLS9+37, // Lowercase Thorn + + // Teletex chars + + COLS9, // A Breve + COLS9, // a Breve + COLS9, // A Macron + COLS9, // a Macron + COLS9, // A Ogonek + COLS9, // a Ogonek + COLS9+3, // C Acute + COLS9+3, // c Acute + COLS9+3, // C Caron or Hachek + COLS9+3, // c Caron or Hachek + COLS9+3, // C Circumflex + COLS9+3, // c Circumflex + COLS9+3, // C Dot Above + COLS9+3, // c Dot Above + COLS9+6, // D Caron or Hachek (Apostrophe Beside) + COLS9+6, // d Caron or Hachek (Apostrophe Beside) + COLS9+7, // E Caron or Hachek + COLS9+7, // e Caron or Hachek + COLS9+7, // E Dot Above + COLS9+7, // e Dot Above + COLS9+7, // E Macron + COLS9+7, // e Macron + COLS9+7, // E Ogonek + COLS9+7, // e Ogonek + COLS9+9, // G Acute + COLS9+9, // g Acute + COLS9+9, // G Breve + COLS9+9, // g Breve + COLS9+9, // G Caron or Hachek + COLS9+9, // g Caron or Hachek + COLS9+9, // G Cedilla (Apostrophe Under) + COLS9+9, // g Cedilla (Apostrophe Over) + COLS9+9, // G Circumflex + COLS9+9, // g Circumflex + COLS9+9, // G Dot Above + COLS9+9, // g Dot Above + COLS9+10, // H Circumflex + COLS9+10, // h Circumflex + COLS9+10, // H Cross Bar + COLS9+10, // h Cross Bar + COLS9+12, // I Dot Above (Sharp Accent) + COLS9+12, // i Dot Above (Sharp Accent) + COLS9+12, // I Macron + COLS9+12, // i Macron + COLS9+12, // I Ogonek + COLS9+12, // i Ogonek + COLS9+12, // I Tilde + COLS9+12, // i Tilde + COLS9+13, // IJ Digraph + COLS9+13, // ij Digraph + COLS9+14, // J Circumflex + COLS9+14, // j Circumflex + COLS9+15, // K Cedilla (Apostrophe Under) + COLS9+15, // k Cedilla (Apostrophe Under) + COLS9+16, // L Acute + COLS9+16, // l Acute + COLS9+16, // L Caron or Hachek (Apostrophe Beside) + COLS9+16, // l Caron or Hachek (Apostrophe Beside) + COLS9+16, // L Cedilla (Apostrophe Under) + COLS9+16, // l Cedilla (Apostrophe Under) + COLS9+16, // L Center Dot + COLS9+16, // l Center Dot + COLS9+16, // L Stroke + COLS9+16, // l Stroke + COLS9+19, // N Acute + COLS9+19, // n Acute + COLS9+19, // N Apostrophe + COLS9+19, // n Apostrophe + COLS9+19, // N Caron or Hachek + COLS9+19, // n Caron or Hachek + COLS9+19, // N Cedilla (Apostrophe Under) + COLS9+19, // n Cedilla (Apostrophe Under) + COLS9+21, // O Double Acute + COLS9+21, // o Double Acute + COLS9+21, // O Macron + COLS9+21, // o Macron + COLS9+22, // OE digraph + COLS9+22, // oe digraph + COLS9+25, // R Acute + COLS9+25, // r Acute + COLS9+25, // R Caron or Hachek + COLS9+25, // r Caron or Hachek + COLS9+25, // R Cedilla (Apostrophe Under) + COLS9+25, // r Cedilla (Apostrophe Under) + COLS9+27, // S Acute + COLS9+27, // s Acute + COLS9+27, // S Caron or Hachek + COLS9+27, // s Caron or Hachek + COLS9+27, // S Cedilla + COLS9+27, // s Cedilla + COLS9+27, // S Circumflex + COLS9+27, // s Circumflex + COLS9+29, // T Caron or Hachek (Apostrophe Beside) + COLS9+29, // t Caron or Hachek (Apostrophe Beside) + COLS9+29, // T Cedilla (Apostrophe Under) + COLS9+29, // t Cedilla (Apostrophe Under) + COLS9+29, // T Cross Bar + COLS9+29, // t Cross Bar + COLS9+30, // U Breve + COLS9+30, // u Breve + COLS9+30, // U Double Acute + COLS9+30, // u Double Acute + COLS9+30, // U Macron + COLS9+30, // u Macron + COLS9+30, // U Ogonek + COLS9+30, // u Ogonek + COLS9+30, // U Ring + COLS9+30, // u Ring + COLS9+30, // U Tilde + COLS9+30, // u Tilde + COLS9+32, // W Circumflex + COLS9+32, // w Circumflex + COLS9+34, // Y Circumflex + COLS9+34, // y Circumflex + COLS9+35, // Z Acute + COLS9+35, // z Acute + COLS9+35, // Z Caron or Hachek + COLS9+35, // z Caron or Hachek + COLS9+35, // Z Dot Above + COLS9+35, // z Dot Above + COLS9+19, // Uppercase Eng + COLS9+19, // Lowercase Eng + + // other + + COLS9+6, // D Macron + COLS9+6, // d Macron + COLS9+16, // L Macron + COLS9+16, // l Macron + COLS9+19, // N Macron + COLS9+19, // n Macron + COLS9+25, // R Grave + COLS9+25, // r Grave + COLS9+27, // S Macron + COLS9+27, // s Macron + COLS9+29, // T Macron + COLS9+29, // t Macron + COLS9+34, // Y Breve + COLS9+34, // y Breve + COLS9+34, // Y Grave + COLS9+34, // y Grave + COLS9+6, // D Apostrophe Beside + COLS9+6, // d Apostrophe Beside + COLS9+21, // O Apostrophe Beside + COLS9+21, // o Apostrophe Beside + COLS9+30, // U Apostrophe Beside + COLS9+30, // u Apostrophe Beside + COLS9+7, // E breve + COLS9+7, // e breve + COLS9+12, // I breve + COLS9+12, // i breve + COLS9+12, // dotless I + COLS9+12, // dotless i + COLS9+21, // O breve + COLS9+21 // o breve +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMBYTE flmSym60Tbl[ SYMTBLLEN + 2] = +{ + 11, // Initial character offset + SYMTBLLEN, // Length of this table + COLS3+2, // pound + COLS3+3, // yen + COLS3+4, // pacetes + COLS3+5, // floren + COLS0, + COLS0, + COLS0, + COLS0, + COLS3+1, // cent +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMBYTE flmGrk60Tbl[ GRKTBLLEN + 2] = +{ + 0, // Starting offset + GRKTBLLEN, // Length + COLS7, // Uppercase Alpha + COLS7, // Lowercase Alpha + COLS7+1, // Uppercase Beta + COLS7+1, // Lowercase Beta + COLS7+1, // Uppercase Beta Medial + COLS7+1, // Lowercase Beta Medial + COLS7+2, // Uppercase Gamma + COLS7+2, // Lowercase Gamma + COLS7+3, // Uppercase Delta + COLS7+3, // Lowercase Delta + COLS7+4, // Uppercase Epsilon + COLS7+4, // Lowercase Epsilon + COLS7+5, // Uppercase Zeta + COLS7+5, // Lowercase Zeta + COLS7+6, // Uppercase Eta + COLS7+6, // Lowercase Eta + COLS7+7, // Uppercase Theta + COLS7+7, // Lowercase Theta + COLS7+8, // Uppercase Iota + COLS7+8, // Lowercase Iota + COLS7+9, // Uppercase Kappa + COLS7+9, // Lowercase Kappa + COLS7+10, // Uppercase Lambda + COLS7+10, // Lowercase Lambda + COLS7+11, // Uppercase Mu + COLS7+11, // Lowercase Mu + COLS7+12, // Uppercase Nu + COLS7+12, // Lowercase Nu + COLS7+13, // Uppercase Xi + COLS7+13, // Lowercase Xi + COLS7+14, // Uppercase Omicron + COLS7+14, // Lowercase Omicron + COLS7+15, // Uppercase Pi + COLS7+15, // Lowercase Pi + COLS7+16, // Uppercase Rho + COLS7+16, // Lowercase Rho + COLS7+17, // Uppercase Sigma + COLS7+17, // Lowercase Sigma + COLS7+17, // Uppercase Sigma Terminal + COLS7+17, // Lowercase Sigma Terminal + COLS7+18, // Uppercase Tau + COLS7+18, // Lowercase Tau + COLS7+19, // Uppercase Upsilon + COLS7+19, // Lowercase Upsilon + COLS7+20, // Uppercase Phi + COLS7+20, // Lowercase Phi + COLS7+21, // Uppercase Chi + COLS7+21, // Lowercase Chi + COLS7+22, // Uppercase Psi + COLS7+22, // Lowercase Psi + COLS7+23, // Uppercase Omega + COLS7+23, // Lowercase Omega + + // Other Modern Greek Characters [8,52] + + COLS7, // Uppercase ALPHA Tonos high prime + COLS7, // Lowercase Alpha Tonos - acute + COLS7+4, // Uppercase EPSILON Tonos - high prime + COLS7+4, // Lowercase Epslion Tonos - acute + COLS7+6, // Uppercase ETA Tonos - high prime + COLS7+6, // Lowercase Eta Tonos - acute + COLS7+8, // Uppercase IOTA Tonos - high prime + COLS7+8, // Lowercase iota Tonos - acute + COLS7+8, // Uppercase IOTA Diaeresis + COLS7+8, // Lowercase iota diaeresis + COLS7+14, // Uppercase OMICRON Tonos - high prime + COLS7+14, // Lowercase Omicron Tonos - acute + COLS7+19, // Uppercase UPSILON Tonos - high prime + COLS7+19, // Lowercase Upsilon Tonos - acute + COLS7+19, // Uppercase UPSILON Diaeresis + COLS7+19, // Lowercase Upsilon diaeresis + COLS7+23, // Uppercase OMEGA Tonos - high prime + COLS7+23, // Lowercase Omega Tonso - acute + + // Variants [8,70] + + COLS7+4, // epsilon (variant) + COLS7+7, // theta (variant) + COLS7+9, // kappa (variant) + COLS7+15, // pi (variant) + COLS7+16, // rho (variant) + COLS7+17, // sigma (variant) + COLS7+19, // upsilon (variant) + COLS7+20, // phi (variant) + COLS7+23, // omega (variant) + + // Greek Diacritic marks [8,79] + + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, + COLS0, // 8,108 end of diacritic marks + + // Ancient Greek [8,109] + + COLS7, // alpha grave + COLS7, // alpha circumflex + COLS7, // alpha w/iota + COLS7, // alpha acute w/iota + COLS7, // alpha grave w/iota + COLS7, // alpha circumflex w/Iota + COLS7, // alpha smooth + COLS7, // alpha smooth acute + COLS7, // alpha smooth grave + COLS7, // alpha smooth circumflex + COLS7, // alpha smooth w/Iota + COLS7, // alpha smooth acute w/Iota + COLS7, // alpha smooth grave w/Iota + COLS7, // alpha smooth circumflex w/Iota + COLS7, // alpha rough + COLS7, // alpha rough acute + COLS7, // alpha rough grave + COLS7, // alpha rough circumflex + COLS7, // alpha rough w/Iota + COLS7, // alpha rough acute w/Iota + COLS7, // alpha rough grave w/Iota + COLS7, // alpha rough circumflex w/Iota + COLS7+4, // epsilon grave + COLS7+4, // epsilon smooth + COLS7+4, // epsilon smooth acute + COLS7+4, // epsilon smooth grave + COLS7+4, // epsilon rough + COLS7+4, // epsilon rough acute + COLS7+4, // epsilon rough grave + COLS7+6, // eta grave + COLS7+6, // eta circumflex + COLS7+6, // eta w/iota + COLS7+6, // eta acute w/iota + COLS7+6, // eta grave w/Iota + COLS7+6, // eta circumflex w/Iota + COLS7+6, // eta smooth + COLS7+6, // eta smooth acute + COLS7+6, // eta smooth grave + COLS7+6, // eta smooth circumflex + COLS7+6, // eta smooth w/Iota + COLS7+6, // eta smooth acute w/Iota + COLS7+6, // eta smooth grave w/Iota + COLS7+6, // eta smooth circumflex w/Iota + COLS7+6, // eta rough + COLS7+6, // eta rough acute + COLS7+6, // eta rough grave + COLS7+6, // eta rough circumflex + COLS7+6, // eta rough w/Iota + COLS7+6, // eta rough acute w/Iota + COLS7+6, // eta rough grave w/Iota + COLS7+6, // eta rough circumflex w/Iota + COLS7+8, // iota grave + COLS7+8, // iota circumflex + COLS7+8, // iota acute diaeresis + COLS7+8, // iota grave diaeresis + COLS7+8, // iota smooth + COLS7+8, // iota smooth acute + COLS7+8, // iota smooth grave + COLS7+8, // iota smooth circumflex + COLS7+8, // iota rough + COLS7+8, // iota rough acute + COLS7+8, // iota rough grave + COLS7+8, // iota rough circumflex + COLS7+14, // omicron grave + COLS7+14, // omicron smooth + COLS7+14, // omicron smooth acute + COLS7+14, // omicron smooth grave + COLS7+14, // omicron rough + COLS7+14, // omicron rough acute + COLS7+14, // omicron rough grave + COLS7+16, // rho smooth + COLS7+16, // rho rough + COLS7+19, // upsilon grave + COLS7+19, // upsilon circumflex + COLS7+19, // upsilon acute diaeresis + COLS7+19, // upsilon grave diaeresis + COLS7+19, // upsilon smooth + COLS7+19, // upsilon smooth acute + COLS7+19, // upsilon smooth grave + COLS7+19, // upsilon smooth circumflex + COLS7+19, // upsilon rough + COLS7+19, // upsilon rough acute + COLS7+19, // upsilon rough grave + COLS7+19, // upsilon rough circumflex + COLS7+23, // omega grave + COLS7+23, // omega circumflex + COLS7+23, // omega w/Iota + COLS7+23, // omega acute w/Iota + COLS7+23, // omega grave w/Iota + COLS7+23, // omega circumflex w/Iota + COLS7+23, // omega smooth + COLS7+23, // omega smooth acute + COLS7+23, // omega smooth grave + COLS7+23, // omega smooth circumflex + COLS7+23, // omega smooth w/Iota + COLS7+23, // omega smooth acute w/Iota + COLS7+23, // omega smooth grave w/Iota + COLS7+23, // omega smooth circumflex w/Iota + COLS7+23, // omega rough + COLS7+23, // omega rough acute + COLS7+23, // omega rough grave + COLS7+23, // omega rough circumflex + COLS7+23, // omega rough w/Iota + COLS7+23, // omega rough acute w/Iota + COLS7+23, // omega rough grave w/Iota + COLS7+23, // omega rough circumflex w/Iota + COLS7+24, // Uppercase Stigma--the number 6 + COLS7+24, // Uppercase Digamma--Obsolete letter used as 6 + COLS7+24, // Uppercase Koppa--Obsolete letter used as 90 + COLS7+24 // Uppercase Sampi--Obsolete letter used as 900 +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMBYTE flmCyrl60Tbl[ CYRLTBLLEN + 2] = +{ + 0, // Starting offset + CYRLTBLLEN, // Length of table + COLS10, // Russian uppercase A + COLS10, // Russian lowercase A + COLS10+1, // Russian uppercase BE + COLS10+1, // Russian lowercase BE + COLS10+2, // Russian uppercase VE + COLS10+2, // Russian lowercase VE + COLS10+3, // Russian uppercase GHE + COLS10+3, // Russian lowercase GHE + COLS10+5, // Russian uppercase DE + COLS10+5, // Russian lowercase DE + COLS10+8, // Russian uppercase E + COLS10+8, // Russian lowercase E + COLS10+9, // Russian lowercase YO + COLS10+9, // Russian lowercase YO + COLS10+11, // Russian uppercase ZHE + COLS10+11, // Russian lowercase ZHE + COLS10+12, // Russian uppercase ZE + COLS10+12, // Russian lowercase ZE + COLS10+14, // Russian uppercase I + COLS10+14, // Russian lowercase I + COLS10+17, // Russian uppercase SHORT I + COLS10+17, // Russian lowercase SHORT I + COLS10+19, // Russian uppercase KA + COLS10+19, // Russian lowercase KA + COLS10+20, // Russian uppercase EL + COLS10+20, // Russian lowercase EL + COLS10+22, // Russian uppercase EM + COLS10+22, // Russian lowercase EM + COLS10+23, // Russian uppercase EN + COLS10+23, // Russian lowercase EN + COLS10+25, // Russian uppercase O + COLS10+25, // Russian lowercase O + COLS10+26, // Russian uppercase PE + COLS10+26, // Russian lowercase PE + COLS10+27, // Russian uppercase ER + COLS10+27, // Russian lowercase ER + COLS10+28, // Russian uppercase ES + COLS10+28, // Russian lowercase ES + COLS10+29, // Russian uppercase TE + COLS10+29, // Russian lowercase TE + COLS10+32, // Russian uppercase U + COLS10+32, // Russian lowercase U + COLS10+34, // Russian uppercase EF + COLS10+34, // Russian lowercase EF + COLS10+35, // Russian uppercase HA + COLS10+35, // Russian lowercase HA + COLS10+36, // Russian uppercase TSE + COLS10+36, // Russian lowercase TSE + COLS10+37, // Russian uppercase CHE + COLS10+37, // Russian lowercase CHE + COLS10+39, // Russian uppercase SHA + COLS10+39, // Russian lowercase SHA + COLS10+40, // Russian uppercase SHCHA + COLS10+40, // Russian lowercase SHCHA + COLS10+41, // Russian lowercase ER (also hard sign) + COLS10+41, // Russian lowercase ER (also hard sign) + COLS10+42, // Russian lowercase ERY + COLS10+42, // Russian lowercase ERY + COLS10+43, // Russian lowercase SOFT SIGN + COLS10+43, // Russian lowercase SOFT SIGN + COLS10+45, // Russian uppercase REVERSE E + COLS10+45, // Russian lowercase REVERSE E + COLS10+46, // Russian uppercase YU + COLS10+46, // Russian lowercase yu + COLS10+47, // Russian uppercase YA + COLS10+47, // Russian lowercase ya + COLS0, // Russian uppercase EH + COLS0, // Russian lowercase eh + COLS10+7, // Macedonian uppercase SOFT DJ + COLS10+7, // Macedonian lowercase soft dj + COLS10+4, // Ukrainian uppercase HARD G + COLS10+4, // Ukrainian lowercase hard g + COLS0, // GE bar + COLS0, // ge bar + COLS10+6, // Serbian uppercase SOFT DJ + COLS10+6, // Serbian lowercase SOFT DJ + COLS0, // IE (variant) + COLS0, // ie (variant) + COLS10+10, // Ukrainian uppercase YE + COLS10+10, // Ukrainian lowercase YE + COLS0, // ZHE with right descender + COLS0, // zhe with right descender + COLS10+13, // Macedonian uppercase ZELO + COLS10+13, // Macedonian lowercase ZELO + COLS0, // Old Slovanic uppercase Z + COLS0, // Old Slovanic uppercase z + COLS0, // II with macron + COLS0, // ii with mscron + COLS10+15, // Ukrainian uppercase I + COLS10+15, // Ukrainian lowercase I + COLS10+16, // Ukrainian uppercase I with Two Dots + COLS10+16, // Ukrainian lowercase I with Two Dots + COLS0, // Old Slovanic uppercase I ligature + COLS0, // Old Slovanic lowercase I ligature + COLS10+18, // Serbian--Macedonian uppercase JE + COLS10+18, // Serbian--Macedonian lowercase JE + COLS10+31, // Macedonian uppercase SOFT K + COLS10+31, // Macedonian lowercase SOFT K + COLS0, // KA with right descender + COLS0, // ka with right descender + COLS0, // KA ogonek + COLS0, // ka ogonek + COLS0, // KA vertical bar + COLS0, // ka vertical bar + COLS10+21, // Serbian--Macedonian uppercase SOFT L + COLS10+21, // Serbian--Macedonian lowercase SOFT L + COLS0, // EN with right descender + COLS0, // en with right descender + COLS10+24, // Serbian--Macedonian uppercase SOFT N + COLS10+24, // Serbian--Macedonian lowercase SOFT N + COLS0, // ROUND OMEGA + COLS0, // round omega + COLS0, // OMEGA + COLS0, // omega + COLS10+30, // Serbian uppercase SOFT T + COLS10+30, // Serbian lowercase SOFT T + COLS10+33, // Byelorussian uppercase SHORT U + COLS10+33, // Byelorussian lowercase SHORT U + COLS0, // U with macron + COLS0, // u with macron + COLS0, // STRAIGHT U + COLS0, // straight u + COLS0, // STRAIGHT U bar + COLS0, // straight u bar + COLS0, // OU ligature + COLS0, // ou ligature + COLS0, // KHA with right descender + COLS0, // kha with right descender + COLS0, // KHA ogonek + COLS0, // kha ogonek + COLS0, // H + COLS0, // h + COLS0, // OMEGA titlo + COLS0, // omega titlo + COLS10+38, // Serbian uppercase HARD DJ + COLS10+38, // Serbian lowercase HARD DJ + COLS0, // CHE with right descender + COLS0, // che with right descender + COLS0, // CHE vertical bar + COLS0, // che vertical bar + COLS0, // Old Slavonic SHCHA (variant) + COLS0, // old SLAVONIC shcha (variant) + COLS10+44, // Old Russian uppercase YAT + COLS10+44, // Old Russian lowercase YAT + + // END OF UNIQUE COLLATED BYTES + // CHARACTERS BELOW MUST HAVE HAVE THEIR OWN + // SUB-COLLATION VALUE TO COMPARE CORRECTLY. + + COLS0, // Old Bulgarian uppercase YUS + COLS0, // Old Bulgarian lowercase YUS + COLS0, // Old Slovanic uppercase YUS MALYI + COLS0, // Old Slovanic uppercase YUS MALYI + COLS0, // KSI + COLS0, // ksi + COLS0, // PSI + COLS0, // psi + COLS0, // Old Russian uppercase FITA + COLS0, // Old Russian lowercase FITA + COLS0, // Old Russian uppercase IZHITSA + COLS0, // Old Russian lowercase IZHITSA + COLS0, // Russian uppercase A acute + COLS0, // Russian lowercase A acute + COLS10+8, // Russian uppercase E acute + COLS10+8, // Russian lowercase E acute + COLS0, // E acute + COLS0, // e acute + COLS10+14, // II acute + COLS10+14, // ii acute + COLS0, // I acute + COLS0, // i acute + COLS0, // YI acute + COLS0, // yi acute + COLS10+25, // O acute + COLS10+25, // o acute + COLS10+32, // U acute + COLS10+32, // u acute + COLS10+42, // YERI acute + COLS10+42, // YERI acute + COLS10+45, // REVERSED E acute + COLS10+45, // reversed e acute + COLS10+46, // YU acute + COLS10+46, // yu acute + COLS10+47, // YA acute + COLS10+47, // ya acute + COLS10, // A grave + COLS10, // a grave + COLS10+8, // E grave + COLS10+8, // e grave + COLS10+9, // YO grave + COLS10+9, // yo grave + COLS10+14, // I grave + COLS10+14, // i grave + COLS10+25, // O grave + COLS10+25, // o grave + COLS10+32, // U grave + COLS10+32, // u grave + COLS10+42, // YERI grave + COLS10+42, // yeri grave + COLS10+45, // REVERSED E grave + COLS10+45, // reversed e grave + COLS10+46, // IU (YU) grave + COLS10+46, // iu (yu) grave + COLS10+47, // ia (YA) grave + COLS10+47, // ia (ya) grave ******* [10,199] +}; + +/**************************************************************************** +Desc: The Hebrew characters are collated over the Russian characters. + Therefore sorting both Hebrew and Russian is impossible to do. +****************************************************************************/ +FLMBYTE flmHeb60TblA[ HEBTBL1LEN + 2] = +{ + 0, // Starting offset + HEBTBL1LEN, // Length of table + COLS10h+0, // Alef + COLS10h+1, // Bet + COLS10h+2, // Gimel + COLS10h+3, // Dalet + COLS10h+4, // He + COLS10h+5, // Vav + COLS10h+6, // Zayin + COLS10h+7, // Het + COLS10h+8, // Tet + COLS10h+9, // Yod + COLS10h+10, // Kaf (final) [9,10] + COLS10h+11, // Kaf + COLS10h+12, // Lamed + COLS10h+13, // Mem (final) + COLS10h+14, // Mem + COLS10h+15, // Nun (final) + COLS10h+16, // Nun + COLS10h+17, // Samekh + COLS10h+18, // Ayin + COLS10h+19, // Pe (final) + COLS10h+20, // Pe [9,20] + COLS10h+21, // Tsadi (final) + COLS10h+22, // Tsadi + COLS10h+23, // Qof + COLS10h+24, // Resh + COLS10h+25, // Shin + COLS10h+26 // Tav [9,26] +}; + +/**************************************************************************** +Desc: This is the ANCIENT HEBREW SCRIPT piece. The actual value will be + stored in the subcollation. This way we don't play + diacritic/subcollation games. +****************************************************************************/ +FLMBYTE flmHeb60TblB[ HEBTBL2LEN + 2] = +{ + 84, + HEBTBL2LEN, + COLS10h+0, // Alef Dagesh [9,84] + COLS10h+1, // Bet Dagesh + COLS10h+1, // Vez - looks like a bet + COLS10h+2, // Gimel Dagesh + COLS10h+3, // Dalet Dagesh + COLS10h+4, // He Dagesh + COLS10h+5, // Vav Dagesh [9,90] + COLS10h+5, // Vav Holem + COLS10h+6, // Zayin Dagesh + COLS10h+7, // Het Dagesh + COLS10h+8, // Tet Dagesh + COLS10h+9, // Yod Dagesh + COLS10h+9, // Yod Hiriq [9,96] - not on my list + COLS10h+11, // Kaf Dagesh + COLS10h+10, // Kaf Dagesh (final) + COLS10h+10, // Kaf Sheva (final) + COLS10h+10, // Kaf Tsere (final) [9,100] + COLS10h+10, // Kaf Segol (final) + COLS10h+10, // Kaf Patah (final) + COLS10h+10, // Kaf Qamats (final) + COLS10h+10, // Kaf Dagesh Qamats (final) + COLS10h+12, // Lamed Dagesh + COLS10h+14, // Mem Dagesh + COLS10h+16, // Nun Dagesh + COLS10h+15, // Nun Qamats (final) + COLS10h+17, // Samekh Dagesh + COLS10h+20, // Pe Dagesh [9,110] + COLS10h+20, // Fe - just guessing this is like Pe - was +21 + COLS10h+22, // Tsadi Dagesh + COLS10h+23, // Qof Dagesh + COLS10h+25, // Sin (with sin dot) + COLS10h+25, // Sin Dagesh (with sin dot) + COLS10h+25, // Shin + COLS10h+25, // Shin Dagesh + COLS10h+26 // Tav Dagesh [9,118] +}; + +/**************************************************************************** +Desc: The Arabic characters are collated OVER the Russian characters + Therefore sorting both Arabic and Russian in the same database + is not supported. + + Arabic starts with a bunch of accents/diacritic marks that are + Actually placed OVER a preceeding character. These accents are + ignored while sorting the first pass - when collation == COLS0. + + There are 4 possible states for all/most arabic characters: + ÚÙ - occurs as the only character in a word + ÄÙ - appears at the first of the word + ÄÄ - appears at the middle of a word + ÚÄ - appears at the end of the word + + Usually only the simple version of the letter is stored. + Therefore we should not have to worry about sub-collation + of these characters. + + The arabic characters with diacritics differ however. The alef has + sub-collation values to sort correctly. There is not any more room + to add more collation values. Some chars in CS14 are combined when + urdu, pashto and sindhi characters overlap. +****************************************************************************/ +FLMBYTE flmAr160Tbl[ AR1TBLLEN + 2] = +{ + 38, // Starting offset + AR1TBLLEN, // Length of table + COLLS+2, // , comma + COLLS+3, // : colon + COLLS+7, // ? question mark + COLS4+2, // * asterick + COLS6, // % percent + COLS9+41, // >> alphabetic - end of list) + COLS9+40, // << alphabetic - end of list) + COLS2, // ( + COLS2+1, // ) + COLS8+1, // ÚÙ One + COLS8+2, // ÚÙ Two + COLS8+3, // ÚÙ Three + COLS8+4, // ÚÙ Four + COLS8+5, // ÚÙ Five + COLS8+6, // ÚÙ Six + COLS8+7, // ÚÙ Seven + COLS8+8, // ÚÙ Eight + COLS8+9, // ÚÙ Nine + COLS8+0, // ÚÙ Zero + COLS8+2, // ÚÙ Two (Handwritten) + COLS10a+1, // ÚÙ alif + COLS10a+1, // ÚÄ alif + COLS10a+2, // ÚÙ ba + COLS10a+2, // ÄÙ ba + COLS10a+2, // ÄÄ ba + COLS10a+2, // ÚÄ ba + COLS10a+6, // ÚÙ ta + COLS10a+6, // ÄÙ ta + COLS10a+6, // ÄÄ ta + COLS10a+6, // ÚÄ ta + COLS10a+8, // ÚÙ tha + COLS10a+8, // ÄÙ tha + COLS10a+8, // ÄÄ tha + COLS10a+8, // ÚÄ tha + COLS10a+12, // ÚÙ jiim + COLS10a+12, // ÄÙ jiim + COLS10a+12, // ÄÄ jiim + COLS10a+12, // ÚÄ jiim + COLS10a+16, // ÚÙ Ha + COLS10a+16, // ÄÙ Ha + COLS10a+16, // ÄÄ Ha + COLS10a+16, // ÚÄ Ha + COLS10a+17, // ÚÙ kha + COLS10a+17, // ÄÙ kha + COLS10a+17, // ÄÄ kha + COLS10a+17, // ÚÄ kha + COLS10a+20, // ÚÙ dal + COLS10a+20, // ÚÄ dal + COLS10a+22, // ÚÙ dhal + COLS10a+22, // ÚÄ dhal + COLS10a+27, // ÚÙ ra + COLS10a+27, // ÚÄ ra + COLS10a+29, // ÚÙ ziin + COLS10a+29, // ÚÄ ziin + COLS10a+31, // ÚÙ siin + COLS10a+31, // ÄÙ siin + COLS10a+31, // ÄÄ siin + COLS10a+31, // ÚÄ siin + COLS10a+32, // ÚÙ shiin + COLS10a+32, // ÄÙ shiin + COLS10a+32, // ÄÄ shiin + COLS10a+32, // ÚÄ shiin + COLS10a+34, // ÚÙ Sad + COLS10a+34, // ÄÙ Sad + COLS10a+34, // ÄÄ Sad + COLS10a+34, // ÚÄ Sad + COLS10a+35, // ÚÙ Dad + COLS10a+35, // ÄÙ Dad + COLS10a+35, // ÄÄ Dad + COLS10a+35, // ÚÄ Dad + COLS10a+36, // ÚÙ Ta + COLS10a+36, // ÄÙ Ta + COLS10a+36, // ÄÄ Ta + COLS10a+36, // ÚÄ Ta + COLS10a+37, // ÚÙ Za + COLS10a+37, // ÄÙ Za + COLS10a+37, // ÄÄ Za + COLS10a+37, // ÚÄ Za + COLS10a+38, // ÚÙ 'ain + COLS10a+38, // ÄÙ 'ain + COLS10a+38, // ÄÄ 'ain + COLS10a+38, // ÚÄ 'ain + COLS10a+39, // ÚÙ ghain + COLS10a+39, // ÄÙ ghain + COLS10a+39, // ÄÄ ghain + COLS10a+39, // ÚÄ ghain + COLS10a+40, // ÚÙ fa + COLS10a+40, // ÄÙ fa + COLS10a+40, // ÄÄ fa + COLS10a+40, // ÚÄ fa + COLS10a+42, // ÚÙ Qaf + COLS10a+42, // ÄÙ Qaf + COLS10a+42, // ÄÄ Qaf + COLS10a+42, // ÚÄ Qaf + COLS10a+43, // ÚÙ kaf + COLS10a+43, // ÄÙ kaf + COLS10a+43, // ÄÄ kaf + COLS10a+43, // ÚÄ kaf + COLS10a+46, // ÚÙ lam + COLS10a+46, // ÄÙ lam + COLS10a+46, // ÄÄ lam + COLS10a+46, // ÚÄ lam + COLS10a+47, // ÚÙ miim + COLS10a+47, // ÄÙ miim + COLS10a+47, // ÄÄ miim + COLS10a+47, // ÚÄ miim + COLS10a+48, // ÚÙ nuun + COLS10a+48, // ÄÙ nuun + COLS10a+48, // ÄÄ nuun + COLS10a+48, // ÚÄ nuun + COLS10a+49, // ÚÙ ha + COLS10a+49, // ÄÙ ha + COLS10a+49, // ÄÄ ha + COLS10a+49, // ÚÄ ha + COLS10a+6, // ÚÙ ta marbuuTah + COLS10a+6, // ÚÄ ta marbuuTah + COLS10a+50, // ÚÙ waw + COLS10a+50, // ÚÄ waw + COLS10a+53, // ÚÙ ya + COLS10a+53, // ÄÙ ya + COLS10a+53, // ÄÄ ya + COLS10a+53, // ÚÄ ya + COLS10a+52, // ÚÙ alif maqSuurah + COLS10a+52, // ÄÙ ya maqSuurah? + COLS10a+52, // ÄÄ ya maqSuurah? + COLS10a+52, // ÚÄ alif maqSuurah + COLS10a+0, // ÚÙ hamzah accent - never appears alone + + // Store the sub-collation as the actual + // character value from this point on + + COLS10a+1, // ÚÙ alif hamzah + COLS10a+1, // ÚÄ alif hamzah + COLS10a+1, // ÚÙ hamzah-under-alif + COLS10a+1, // ÚÄ hamzah-under-alif + COLS10a+1, // ÚÙ waw hamzah + COLS10a+1, // ÚÄ waw hamzah + COLS10a+1, // ÚÙ ya hamzah + COLS10a+1, // ÄÙ ya hamzah + COLS10a+1, // ÄÄ ya hamzah + COLS10a+1, // ÚÄ ya hamzah + COLS10a+1, // ÚÙ alif fatHataan + COLS10a+1, // ÚÄ alif fatHataan + COLS10a+1, // ÚÙ alif maddah + COLS10a+1, // ÚÄ alif maddah + COLS10a+1, // ÚÙ alif waSlah + COLS10a+1, // ÚÄ alif waSlah (final) + + // LIGATURES + // Should NEVER be stored so will not worry + // about breaking up into pieces for collation. + // NOTE: + // Let's store the "Lam" collation value (+42) + // below and in the sub-collation store the + // actual character. This will sort real close. + // The best implementation is to + // break up ligatures into its base pieces. + + COLS10a+46, // ÚÙ lamalif + COLS10a+46, // ÚÄ lamalif + COLS10a+46, // ÚÙ lamalif hamzah + COLS10a+46, // ÚÄ lamalif hamzah + COLS10a+46, // ÚÙ hamzah-under-lamalif + COLS10a+46, // ÚÄ hamzah-under-lamalif + COLS10a+46, // ÚÙ lamalif fatHataan + COLS10a+46, // ÚÄ lamalif fatHataan + COLS10a+46, // ÚÙ lamalif maddah + COLS10a+46, // ÚÄ lamalif maddah + COLS10a+46, // ÚÙ lamalif waSlah + COLS10a+46, // ÚÄ lamalif waSlah + COLS10a+46, // ÚÙ Allah - khaDalAlif + COLS0_ARABIC, // ÄÄ taTwiil + COLS0_ARABIC // ÄÄ taTwiil +}; +/**************************************************************************** +Desc: +****************************************************************************/ +FLMBYTE flmAr260Tbl[ AR2TBLLEN + 2] = +{ + 41, // Starting offset + AR2TBLLEN, // Length of table + COLS8+4, // Farsi and Urdu Four + COLS8+4, // Urdu Four + COLS8+5, // Farsi and Urdu Five + COLS8+6, // Farsi Six + COLS8+6, // Farsi and Urdu Six + COLS8+7, // Urdu Seven + COLS8+8, // Urdu Eight + COLS10a+3, // Sindhi bb - baa /w 2 dots below (67b) + COLS10a+3, + COLS10a+3, + COLS10a+3, + COLS10a+4, // Sindhi bh - baa /w 4 dots below (680) + COLS10a+4, + COLS10a+4, + COLS10a+4, + COLS10a+5, // Malay, Kurdish, Pashto, Farsi, Sindhi, and Urdu p + COLS10a+5, // =peh - taa /w 3 dots below (67e) + COLS10a+5, + COLS10a+5, + COLS10a+7, // Urdu T - taa /w small tah + COLS10a+7, + COLS10a+7, + COLS10a+7, + COLS10a+7, // Pashto T - taa /w ring (forced to combine) + COLS10a+7, + COLS10a+7, + COLS10a+7, + COLS10a+9, // Sindhi th - taa /w 4 dots above (67f) + COLS10a+9, + COLS10a+9, + COLS10a+9, + COLS10a+10, // Sindhi Tr - taa /w 3 dots above (67d) + COLS10a+10, + COLS10a+10, + COLS10a+10, + COLS10a+11, // Sindhi Th - taa /w 2 dots above (67a) + COLS10a+11, + COLS10a+11, + COLS10a+11, + COLS10a+13, // Sindhi jj - haa /w 2 middle dots verticle (684) + COLS10a+13, + COLS10a+13, + COLS10a+13, + COLS10a+14, // Sindhi ny - haa /w 2 middle dots (683) + COLS10a+14, + COLS10a+14, + COLS10a+14, + COLS10a+15, // Malay, Kurdish, Pashto, Farsi, Sindhi, and Urdu ch + COLS10a+15, // =tcheh (686) + COLS10a+15, + COLS10a+15, + COLS10a+15, // Sindhi chh - haa /w middle 4 dots (687) + COLS10a+15, // forced to combine + COLS10a+15, + COLS10a+15, + COLS10a+18, // Pashto ts - haa /w 3 dots above (685) + COLS10a+18, + COLS10a+18, + COLS10a+18, + COLS10a+19, // Pashto dz - hamzah on haa (681) + COLS10a+19, + COLS10a+19, + COLS10a+19, + COLS10a+21, // Urdu D - dal /w small tah (688) + COLS10a+21, + COLS10a+21, // Pashto D - dal /w ring (689) forced to combine + COLS10a+21, + COLS10a+23, // Sindhi dh - dal /w 2 dots above (68c) + COLS10a+23, + COLS10a+24, // Sindhi D - dal /w 3 dots above (68e) + COLS10a+24, + COLS10a+25, // Sindhi Dr - dal /w dot below (68a) + COLS10a+25, + COLS10a+26, // Sindhi Dh - dal /w 2 dots below (68d) + COLS10a+26, + COLS10a+28, // Pashto r - ra /w ring (693) + COLS10a+28, + COLS10a+28, // Urdu R - ra /w small tah (691) forced to combine + COLS10a+28, + COLS10a+28, // Sindhi r - ra /w 4 dots above (699) forced to combine + COLS10a+28, + COLS10a+27, // Kurdish rolled r - ra /w 'v' below (695) + COLS10a+27, + COLS10a+27, + COLS10a+27, + COLS10a+30, // Kurdish, Pashto, Farsi, Sindhi, and Urdu Z + COLS10a+30, // = jeh - ra /w 3 dots above (698) + COLS10a+30, // Pashto zz - ra /w dot below & dot above (696) + COLS10a+30, // forced to combine + COLS10a+30, // Pashto g - not in unicode! - forced to combine + COLS10a+30, + COLS10a+33, // Pashto x - seen dot below & above (69a) + COLS10a+33, + COLS10a+33, + COLS10a+33, + COLS10a+39, // Malay ng - old maly ain /w 3 dots above (6a0) + COLS10a+39, // forced to combine + COLS10a+39, + COLS10a+39, + COLS10a+41, // Malay p, Kurdish v - Farsi ? - fa /w 3 dots above + COLS10a+41, // = veh - means foreign words (6a4) + COLS10a+41, + COLS10a+41, + COLS10a+41, // Sindhi ph - fa /w 4 dots above (6a6) forced to combine + COLS10a+41, + COLS10a+41, + COLS10a+41, + COLS10a+43, // Misc k - open caf (6a9) + COLS10a+43, + COLS10a+43, + COLS10a+43, + COLS10a+43, // misc k - no unicode - forced to combine + COLS10a+43, + COLS10a+43, + COLS10a+43, + COLS10a+43, // Sindhi k - swash caf (various) (6aa) -forced to combine + COLS10a+43, + COLS10a+43, + COLS10a+43, + COLS10a+44, // Persian/Urdu g - gaf (6af) + COLS10a+44, + COLS10a+44, + COLS10a+44, + COLS10a+44, // Persian/Urdu g - no unicode + COLS10a+44, + COLS10a+44, + COLS10a+44, + COLS10a+44, // malay g - gaf /w ring (6b0) + COLS10a+44, + COLS10a+44, + COLS10a+44, + COLS10a+44, // Sindhi ng - gaf /w 2 dots above (6ba) + COLS10a+44, // forced to combine ng only + COLS10a+44, + COLS10a+44, + COLS10a+45, // Sindhi gg - gaf /w 2 dots vertical below (6b3) + COLS10a+45, + COLS10a+45, + COLS10a+45, + COLS10a+46, // Kurdish velar l - lam /w small v (6b5) + COLS10a+46, + COLS10a+46, + COLS10a+46, + COLS10a+46, // Kurdish Lamalif with diacritic - no unicode + COLS10a+46, + COLS10a+48, // Urdu n - dotless noon (6ba) + COLS10a+48, + COLS10a+48, + COLS10a+48, + COLS10a+48, // Pashto N - noon /w ring (6bc) - forced to combine + COLS10a+48, + COLS10a+48, + COLS10a+48, + COLS10a+48, // Sindhi N - dotless noon/w small tah (6bb) + COLS10a+48, // forced to combine + COLS10a+48, + COLS10a+48, + COLS10a+50, // Kurdish o - waw /w small v (6c6) + COLS10a+50, + COLS10a+50, // Kurdish o - waw /w bar above (6c5) + COLS10a+50, + COLS10a+50, // Kurdish o - waw /w 2 dots above (6ca) + COLS10a+50, + COLS10a+51, // Urdu h - no unicode + COLS10a+51, + COLS10a+51, + COLS10a+51, + COLS10a+52, // Kurdish ˆ - ya /w small v (6ce) + COLS10a+52, + COLS10a+52, + COLS10a+52, + COLS10a+54, // Urdu y - ya barree (6d2) + COLS10a+54, + COLS10a+54, // Malay ny - ya /w 3 dots below (6d1) forced to combine + COLS10a+54, + COLS10a+54, + COLS10a+54, + COLS10a+51, // Farsi hamzah - hamzah on ha (6c0) forced to combine + COLS10a+51 +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +typedef struct TBL_B_TO_BP +{ + FLMBYTE key; + FLMBYTE * charPtr; +} TBL_B_TO_BP; + +/**************************************************************************** +Desc: This table describes and gives addresses for collating + character sets. Each line corresponds to a character set. +****************************************************************************/ +TBL_B_TO_BP flmCol60Tbl[] = +{ + {F_CHSASCI, flmAsc60Tbl}, + {F_CHSMUL1, flmMn60Tbl}, + {F_CHSSYM1, flmSym60Tbl}, + {F_CHSGREK, flmGrk60Tbl}, + {F_CHSCYR, flmCyrl60Tbl}, + {0xFF, 0} +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +TBL_B_TO_BP flmHebArabicCol60Tbl[] = +{ + {F_CHSASCI, flmAsc60Tbl}, + {F_CHSMUL1, flmMn60Tbl}, + {F_CHSSYM1, flmSym60Tbl}, + {F_CHSGREK, flmGrk60Tbl}, + {F_CHSHEB, flmHeb60TblA}, + {F_CHSHEB, flmHeb60TblB}, + {F_CHSARB1, flmAr160Tbl}, + {F_CHSARB2, flmAr260Tbl}, + {0xff, 0} +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMUINT16 flmIndexi[] = +{ + 0, + 11, + 14, + 15, + 17, + 18, + 19, + 21, + 22, + 23, + 24, + 25, + 26, + 35, + 59 +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMUINT16 flmIndexj[] = +{ + FLM_CA_LANG, // Catalan + FLM_CF_LANG, // Canadian French + FLM_CZ_LANG, // Czech + FLM_SL_LANG, // Slovak + FLM_DE_LANG, // German + FLM_SD_LANG, // Swiss German + FLM_ES_LANG, // Spanish (Spain) + FLM_FR_LANG, // French + FLM_NL_LANG, // Netherlands + 0xFFFF, // DK_LANG, Danish + 0xFFFF, // NO_LANG, Norwegian + 0x0063, // c + 0x006c, // l + 0x0197, // l with center dot + 0x0063, // c + 0x0125, // ae digraph + 0x01a7, // oe digraph + 0x0068, // h + 0x0068, // h + 0x006c, // l + 0x0101, // center dot alone + 0x006c, // l + 0x0117, // á (for German) + 0x018b, // ij digraph + 0x0000, // was 'a' - will no longer map 'aa' to a-ring + 0x0000, // was 'a' + FLM_CZ_LANG, + FLM_DK_LANG, + FLM_NO_LANG, + FLM_SL_LANG, + FLM_TK_LANG, + FLM_SU_LANG, + FLM_IS_LANG, + FLM_SV_LANG, + FLM_YK_LANG, + 0x011e, // A diaeresis + 0x011f, // a diaeresis + 0x0122, // A ring + 0x0123, // a ring + 0x0124, // AE diagraph + 0x0125, // ae diagraph + 0x013e, // O diaeresis + 0x013f, // o diaeresis + 0x0146, // U diaeresis + 0x0147, // u diaeresis + 0x0150, // O slash + 0x0151, // o slash + 0x0A3a, // CYRILLIC SOFT SIGN + 0x0A3b, // CYRILLIC soft sign + 0x01ee, // dotless i - turkish + 0x01ef, // dotless I - turkish + 0x0162, // C Hacek/caron + 0x0163, // c Hacek/caron + 0x01aa, // R Hacek/caron + 0x01ab, // r Hacek/caron + 0x01b0, // S Hacek/caron + 0x01b1, // s Hacek/caron + 0x01ce, // Z Hacek/caron + 0x01cf, // z Hacek/caron +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMUINT16 flmValuea[] = +{ + STATE1, + STATE3, + STATE2, + STATE2, + STATE8, + STATE8, + STATE1, + STATE3, + STATE9, + STATE10, // No longer in use + STATE10, // No longer in use + STATE4, + STATE6, + STATE6, + STATE5, + INSTAE, + INSTOE, + AFTERC, + AFTERH, + AFTERL, + STATE7, + STATE6, + INSTSG, // ss for German + INSTIJ, + STATE11, // aa - no longer in use + WITHAA, // aa - no longer in use + START_CZ, // Czech + START_DK, // Danish + START_NO, // Norwegian + START_SL, // Slovak + START_TK, // Turkish + START_SU, // Finnish + START_IS, // Icelandic + START_SV, // Swedish + START_YK, // Ukrainian + + // Single character fixups + + COLS9, COLS9, COLS9, COLS9, // US and others + COLS9+1, COLS9+1, COLS9+21, COLS9+21, + COLS9+30, COLS9+30, COLS9+21, COLS9+21, + COLS10+43, COLS10+43, COLS9+12, COLS9+12, + COLS9+3, COLS9+3, COLS9+25, COLS9+25, + COLS9+27, COLS9+27, COLS9+35, COLS9+35, + + COLS9+45, COLS9+45, COLS9+55, COLS9+55, // DANISH + COLS9+42, COLS9+42, COLS9+53, COLS9+53, + COLS9+30, COLS9+30, COLS9+49, COLS9+49, + COLS10+43, COLS10+43, COLS9+12, COLS9+12, + COLS9+3, COLS9+3, COLS9+25, COLS9+25, + COLS9+27, COLS9+27, COLS9+35, COLS9+35, + + COLS9, COLS9, COLS9, COLS9, // Icelandic + COLS9+46, COLS9+46, COLS9+50, COLS9+50, + COLS9+30, COLS9+30, COLS9+54, COLS9+54, + COLS10+43, COLS10+43, COLS9+12, COLS9+12, + COLS9+3, COLS9+3, COLS9+25, COLS9+25, + COLS9+27, COLS9+27, COLS9+35, COLS9+35, + + COLS9, COLS9, COLS9+51, COLS9+51, // Norwegian + COLS9+43, COLS9+43, COLS9+21, COLS9+21, + COLS9+30, COLS9+30, COLS9+47, COLS9+47, + COLS10+43, COLS10+43, COLS9+12, COLS9+12, + COLS9+3, COLS9+3, COLS9+25, COLS9+25, + COLS9+27, COLS9+27, COLS9+35, COLS9+35, + + COLS9+48, COLS9+48, COLS9+44, COLS9+44, // Finnish/Swedish + COLS9+1, COLS9+1, COLS9+52, COLS9+52, + COLS9+30, COLS9+30, COLS9+21, COLS9+21, + COLS10+43, COLS10+43, COLS9+12, COLS9+12, + COLS9+3, COLS9+3, COLS9+25, COLS9+25, + COLS9+27, COLS9+27, COLS9+35, COLS9+35, + + COLS9, COLS9, COLS9, COLS9, // Ukrain + COLS9+1, COLS9+1, COLS9+21, COLS9+21, + COLS9+30, COLS9+30, COLS9+21, COLS9+21, + COLS10+48, COLS10+48, COLS9+12, COLS9+12, + COLS9+3, COLS9+3, COLS9+25, COLS9+25, + COLS9+27, COLS9+27, COLS9+35, COLS9+35, + + COLS9, COLS9, COLS9, COLS9, // Turkish + COLS9+1, COLS9+1, COLS9+21, COLS9+21, + COLS9+30, COLS9+30, COLS9+21, COLS9+21, + COLS9+43, COLS9+43, COLS9+11, COLS9+11, + COLS9+3, COLS9+3, COLS9+25, COLS9+25, + COLS9+27, COLS9+27, COLS9+35, COLS9+35, + + COLS9, COLS9, COLS9, COLS9, // Czech / Slovak + COLS9+1, COLS9+1, COLS9+21, COLS9+21, + COLS9+30, COLS9+30, COLS9+21, COLS9+21, + COLS10+43, COLS10+43, COLS9+12, COLS9+12, + COLS9+5, COLS9+5, COLS9+26, COLS9+26, + COLS9+28, COLS9+28, COLS9+36, COLS9+36 +}; + +/**************************************************************************** +Desc: Maps from charset 11 to CS24 (punctuation) (starting from 11,0) +****************************************************************************/ +FLMBYTE From0AToZen[] = +{ + 0, 9, 40, 0x53, // sp ! " # + 0x4f, 0x52, 0x54, 38, // $ % & ' + 0x29, 0x2a, 0x55, 0x3b, // ( ) * + + 3, 0x1d, 4, 0x1e // , - . / +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMBYTE From0BToZen[] = +{ + 6, 7, 0x42, 0x40, // : ; < = + 0x43, 8, 0x56 // > ? @ +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMBYTE From0CToZen[] = +{ + 0x2d, 0x1f, 0x2e, // [ \ ] + 0x0f, 0x11, 0x0d // ^ _ ` +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMBYTE From0DToZen[] = +{ + 0x2f, 0x22, 0x30, 0x20 // { | } ~ +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +FLMBYTE From8ToZen[] = +{ + 0x5e, 0x7e, 0x5f, 0x7f, 0x5f, 0xFF, 0x60, 0x80, + 0x61, 0x81, 0x62, 0x82, 0x63, 0x83, 0x64, 0x84, + 0x65, 0x85, 0x66, 0x86, 0x67, 0x87, 0x68, 0x88, + 0x69, 0x89, 0x6a, 0x8a, 0x6b, 0x8b, 0x6c, 0x8c, + 0x6d, 0x8d, 0x6e, 0x8e, 0x6f, 0x8f, 0x6f, 0xFF, + 0x70, 0x90, 0x71, 0x91, 0x72, 0x92, 0x73, 0x93, + 0x74, 0x94, 0x75, 0x95 +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +static FLMBYTE From11AToZen[] = +{ + 2, // japanese period + 0x35, // left bracket + 0x36, // right bracket + 0x01, // comma + 0x05 // chuuten +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +static FLMBYTE From11BToZen[] = +{ + 0x51, // wo + 0,2,4,6,8,0x42,0x44,0x46,0x22, // small a i u e o ya yu yo tsu + 0xFF, 1, 3, 5, 7, 9, // dash (x241b) a i u e o + 0x0a, 0x0c, 0x0e, 0x10, 0x12, // ka ki ku ke ko + 0x14, 0x16, 0x18, 0x1a, 0x1c, // sa shi su se so + 0x1e, 0x20, 0x23, 0x25, 0x27, // ta chi tsu te to + 0x29, 0x2a, 0x2b, 0x2c, 0x2d, // na ni nu ne no + 0x2e, 0x31, 0x34, 0x37, 0x3a, // ha hi fu he ho + 0x3d, 0x3e, 0x3f, 0x40, 0x41, // ma mi mu me mo + 0x43, 0x45, 0x47, // ya yu yo + 0x48, 0x49, 0x4a, 0x4b, 0x4c, // ra ri ru re ro + 0x4e, 0x52 // WA N +}; + +/**************************************************************************** +Desc: +****************************************************************************/ +typedef struct BYTE_WORD_TBL +{ + FLMBYTE ByteValue; + FLMUINT16 WordValue; +} BYTE_WORD_TBL; + +/**************************************************************************** +Desc: Convert Zenkaku (double wide) to Hankaku (single wide) + Character set 0x24 maps to single wide chars in other char sets. + This enables collation values to be found on some symbols. + This is also used to convert symbols from hankaku to Zen24. +****************************************************************************/ +BYTE_WORD_TBL Zen24ToHankaku[] = +{ + { 0 ,0x0020 }, // space + { 1 ,0x0b03 }, // japanese comma + { 2 ,0x0b00 }, // circle period + { 3 , 44 }, // comma + { 4 , 46 }, // period + { 5 ,0x0b04 }, // center dot + { 6 , 58 }, // colon + { 7 , 59 }, // semicolon + { 8 , 63 }, // question mark + { 9 , 33 }, // exclamation mark + { 10 ,0x0b3d }, // dakuten + { 11 ,0x0b3e }, // handakuten + { 12 ,0x0106 }, // accent mark + { 13 , 96 }, // accent mark + { 14 ,0x0107 }, // umlat + { 15 , 94 }, // caret + { 16 ,0x0108 }, // macron + { 17 , 95 }, // underscore + { 27 ,0x0b0f }, // extend vowel + { 28 ,0x0422 }, // mdash + { 29 , 45 }, // hyphen + { 30 , 47 }, // slash + { 31 ,0x0607 }, // backslash + { 32 , 126 }, // tilde + { 33 ,0x0611 }, // doubleline + { 34 ,0x0609 }, // line + { 37 ,0x041d }, // left apostrophe + { 38 ,0x041c }, // right apostrophe + { 39 ,0x0420 }, // left quote + { 40 ,0x041f }, // right quote + { 41 , 40 }, // left paren + { 42 , 41 }, // right paren + { 45 , 91 }, // left bracket + { 46 , 93 }, // right bracket + { 47 , 123 }, // left curly bracket + { 48 , 125 }, // right curly bracket + { 53 ,0x0b01 }, // left j quote + { 54 ,0x0b02 }, // right j quote + { 59 , 43 }, // plus + { 60 ,0x0600 }, // minus + { 61 ,0x0601 }, // plus/minus + { 62 ,0x0627 }, // times + { 63 ,0x0608 }, // divide + { 64 , 61 }, // equal + { 65 ,0x0663 }, // unequal + { 66 , 60 }, // less + { 67 , 62 }, // greater + { 68 ,0x0602 }, // less/equal + { 69 ,0x0603 }, // greater/equal + { 70 ,0x0613 }, // infinity + { 71 ,0x0666 }, // traingle dots + { 72 ,0x0504 }, // man + { 73 ,0x0505 }, // woman + { 75 ,0x062d }, // prime + { 76 ,0x062e }, // double prime + { 78 ,0x040c }, // yen + { 79 , 36 }, // dollar + { 80 ,0x0413 }, // cent + { 81 ,0x040b }, // pound + { 82 , 37 }, // percent + { 83 , 35 }, // # + { 84 , 38 }, // & + { 85 , 42 }, // * + { 86 , 64 }, // @ + { 87 ,0x0406 }, // squiggle + { 89 ,0x06b8 }, // filled star + { 90 ,0x0425 }, // hollow circle + { 91 ,0x042c }, // filled circle + { 93 ,0x065f }, // hollow diamond + { 94 ,0x0660 }, // filled diamond + { 95 ,0x0426 }, // hollow box + { 96 ,0x042e }, // filled box + { 97 ,0x0688 }, // hollow triangle + { 99 ,0x0689 }, // hollow upside down triangle + { 103,0x0615 }, // right arrow + { 104,0x0616 }, // left arrow + { 105,0x0617 }, // up arrow + { 106,0x0622 }, // down arrow + { 119,0x060f }, + { 121,0x0645 }, + { 122,0x0646 }, + { 123,0x0643 }, + { 124,0x0644 }, + { 125,0x0642 }, // union + { 126,0x0610 }, // intersection + { 135,0x0655 }, + { 136,0x0656 }, + { 138,0x0638 }, // right arrow + { 139,0x063c }, // left/right arrow + { 140,0x067a }, + { 141,0x0679 }, + { 153,0x064f }, // angle + { 154,0x0659 }, + { 155,0x065a }, + { 156,0x062c }, + { 157,0x062b }, + { 158,0x060e }, + { 159,0x06b0 }, + { 160,0x064d }, + { 161,0x064e }, + { 162,0x050e }, // square root + { 164,0x0604 }, + { 175,0x0623 }, // angstrom + { 176,0x044b }, // percent + { 177,0x051b }, // sharp + { 178,0x051c }, // flat + { 179,0x0509 }, // musical note + { 180,0x0427 }, // dagger + { 181,0x0428 }, // double dagger + { 182,0x0405 }, // paragraph + { 187,0x068f } // big hollow circle +}; + +/**************************************************************************** +Desc: Maps CS26 to CharSet 11 +****************************************************************************/ +FLMBYTE MapCS26ToCharSet11[ 86 ] = +{ + 0x06, // 0 a + 0x10, // 1 A + 0x07, // 2 i + 0x11, // 3 I + 0x08, // 4 u + 0x12, // 5 U + 0x09, // 6 e + 0x13, // 7 E + 0x0a, // 8 o + 0x14, // 9 O + 0x15, // 0x0a KA + 0x95, // GA - 21 followed by 0x3D dakuten + 0x16, // 0x0c KI + 0x96, // GI + 0x17, // 0x0e KU + 0x97, // GU + 0x18, // 0x10 KE + 0x98, // GE + 0x19, // 0x12 KO + 0x99, // GO + 0x1a, // 0x14 SA + 0x9a, // ZA + 0x1b, // 0x16 SHI + 0x9b, // JI + 0x1c, // 0x18 SU + 0x9c, // ZU + 0x1d, // 0x1a SE + 0x9d, // ZE + 0x1e, // 0x1c SO + 0x9e, // ZO + 0x1f, // 0x1e TA + 0x9f, // DA + 0x20, // 0x20 CHI + 0xa0, // JI + 0x0e, // 0x22 small tsu + 0x21, // 0x23 TSU + 0xa1, // ZU + 0x22, // 0x25 TE + 0xa2, // DE + 0x23, // 0x27 TO + 0xa3, // DO + 0x24, // 0x29 NA + 0x25, // 0x2a NI + 0x26, // 0x2b NU + 0x27, // 0x2c NE + 0x28, // 0x2d NO + 0x29, // 0x2e HA + 0xa9, // 0x2f BA + 0xe9, // 0x30 PA + 0x2a, // 0x31 HI + 0xaa, // 0x32 BI + 0xea, // 0x33 PI + 0x2b, // 0x34 FU + 0xab, // 0x35 BU + 0xeb, // 0x36 PU + 0x2c, // 0x37 HE + 0xac, // 0x38 BE + 0xec, // 0x39 PE + 0x2d, // 0x3a HO + 0xad, // 0x3b BO + 0xed, // 0x3c PO + 0x2e, // 0x3d MA + 0x2f, // 0x3e MI + 0x30, // 0x3f MU + 0x31, // 0x40 ME + 0x32, // 0x41 MO + 0x0b, // 0x42 small ya + 0x33, // 0x43 YA + 0x0c, // 0x44 small yu + 0x34, // 0x45 YU + 0x0d, // 0x46 small yo + 0x35, // 0x47 YO + 0x36, // 0x48 RA + 0x37, // 0x49 RI + 0x38, // 0x4a RU + 0x39, // 0x4b RE + 0x3a, // 0x4c RO + 0xff, // 0x4d small wa + 0x3b, // 0x4e WA + 0xff, // 0x4f WI + 0xff, // 0x50 WE + 0x05, // 0x51 WO + 0x3c, // 0x52 N + 0xff, // 0x53 VU + 0xff, // 0x54 ka + 0xff // 0x55 ke +}; + +/**************************************************************************** +Desc: Kana subcollation values + BIT 0: set if large char + BIT 1: set if voiced + BIT 2: set if half voiced +Notes: To save space should be nibbles +IMPORTANT: The '1' entries that do not have a matching '0' entry have been + changed to zero to save space in the subcollation area. +****************************************************************************/ +FLMBYTE flmKanaSubColTbl[] = +{ + 0,1,0,1,0,1,0,1,0,1, // a A i I u U e E o O + 1,3,0,3,0,3,1,3,0,3, // KA GA KI GI KU GU KE GE KO GO + 0,3,0,3,0,3,0,3,0,3, // SA ZA SHI JI SU ZU SE ZE SO ZO + 0,3,0,3,0,1,3,0,3,0,3, // TA DA CHI JI tsu TSU ZU TE DE TO DO + 0,0,0,0,0, // NA NI NU NE NO + 0,3,5,0,3,5,0,3,5, // HA BA PA HI BI PI FU BU PU + 0,3,5,0,3,5, // HE BE PE HO BO PO + 0,0,0,0,0, // MA MI MU ME MO + 0,1,0,1,0,1, // ya YA yu YU yo YO + 0,0,0,0,0, // RA RI RU RE RO + 0,1,0,0,0, // wa WA WI WE WO + 0,3,0,0 // N VU ka ke +}; + /**************************************************************************** Desc: ****************************************************************************/ @@ -144,6 +4332,71 @@ int main( void) f_sprintf( szTmpBuf, "Hello, World! (You're number %u)\n", 1); f_printf( szTmpBuf); + // Make sure the diacritics constants are defined correctly + + f_assert( F_GRAVE == grave); + f_assert( F_CENTERD == centerd); + f_assert( F_TILDE == tilde); + f_assert( F_CIRCUM == circum); + f_assert( F_CROSSB == crossb); + f_assert( F_SLASH == slash); + f_assert( F_ACUTE == acute); + f_assert( F_UMLAUT == umlaut); + f_assert( F_MACRON == macron); + f_assert( F_APOSAB == aposab); + f_assert( F_APOSBES == aposbes); + f_assert( F_APOSBA == aposba); + f_assert( F_RING == ring); + f_assert( F_DOTA == dota); + f_assert( F_DACUTE == dacute); + f_assert( F_CEDILLA == cedilla); + f_assert( F_OGONEK == ogonek); + f_assert( F_CARON == caron); + f_assert( F_STROKE == stroke); + f_assert( F_BREVE == breve); + f_assert( F_DOTLESI == dotlesi); + f_assert( F_DOTLESJ == dotlesj); + f_assert( F_GACUTE == gacute); + f_assert( F_GDIA == gdia); + f_assert( F_GACTDIA == gactdia); + f_assert( F_GGRVDIA == ggrvdia); + f_assert( F_GGRAVE == ggrave); + f_assert( F_GCIRCM == gcircm); + f_assert( F_GSMOOTH == gsmooth); + f_assert( F_GROUGH == grough); + f_assert( F_GIOTA == giota); + f_assert( F_GSMACT == gsmact); + f_assert( F_GRGACT == grgact); + f_assert( F_GSMGRV == gsmgrv); + f_assert( F_GRGGRV == grggrv); + f_assert( F_GSMCIR == gsmcir); + f_assert( F_GRGCIR == grgcir); + f_assert( F_GACTIO == gactio); + f_assert( F_GGRVIO == ggrvio); + f_assert( F_GCIRIO == gcirio); + f_assert( F_GSMIO == gsmio); + f_assert( F_GRGIO == grgio); + f_assert( F_GSMAIO == gsmaio); + f_assert( F_GRGAIO == grgaio); + f_assert( F_GSMGVIO == gsmgvio); + f_assert( F_GRGGVIO == grggvio); + f_assert( F_GSMCIO == gsmcio); + f_assert( F_GRGCIO == grgcio); + f_assert( F_GHPRIME == ghprime); + f_assert( F_GLPRIME == glprime); + f_assert( F_RACUTE == racute); + f_assert( F_RGRAVE == rgrave); + f_assert( F_RRTDESC == rrtdesc); + f_assert( F_ROGONEK == rogonek); + f_assert( F_RMACRON == rmacron); + + // Make sure text stuff is working + + if( RC_BAD( rc = ftkTestText())) + { + goto Exit; + } + // Run a multi-threaded test to verify the proper operation of // the atomic operations @@ -694,3 +4947,1062 @@ FSTATIC FLMBYTE ftkSlowPacketChecksum( return ((FLMBYTE) uiChecksum); } + +/**************************************************************************** +Desc: +****************************************************************************/ +RCODE ftkTestText( void) +{ + RCODE rc = NE_FLM_OK; + FLMBOOL bNotFound; + FLMBOOL bNotFound2; + FLMBOOL bConverted; + FLMUINT uiLoop; + FLMUINT uiSubloop; + FLMUINT uiStartTime; + FLMUINT uiSlowTime; + FLMUINT uiFastTime; + FLMUINT16 ui16WpChar; + FLMUINT16 ui16BaseChar; + FLMUINT16 ui16BaseChar2; + FLMUINT16 ui16DiacriticChar; + FLMUINT16 ui16DiacriticChar2; + FLMUINT16 ui16Cmb; + FLMUINT16 ui16Cmb2; + FLMUNICODE uzUniChar; + FLMUINT16 ui16TmpWpChar; + + f_printf( "Running case conversion tests ... "); + + for( uiLoop = 0; uiLoop <= 0xFFFF; uiLoop++) + { + ui16WpChar = (FLMUINT16)uiLoop; + + if( flmCh6Upper( ui16WpChar) != f_wpUpper( ui16WpChar) || + flmIsUpper( ui16WpChar) != f_wpIsUpper( ui16WpChar) || + flmCh6Lower( ui16WpChar) != f_wpLower( ui16WpChar)) + { + f_printf( "Case conversion failed on character 0x%04X.\n", + (unsigned)ui16WpChar); + rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); + goto Exit; + } + } + + f_printf( "Done.\n"); + f_printf( "Running character collation tests ... "); + + for( uiLoop = 0; uiLoop <= 0xFFFF; uiLoop++) + { + FLMUINT16 ui16WpChar = (FLMUINT16)uiLoop; + FLMUINT uiLanguage; + + for( uiLanguage = 0; uiLanguage < FLM_LAST_LANG; uiLanguage++) + { + if( f_wpGetCollation( ui16WpChar, uiLanguage) != + flmGetCollation( ui16WpChar, uiLanguage)) + { + f_printf( "Character collation failed on character 0x%04X.\n", + (unsigned)ui16WpChar); + rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); + goto Exit; + } + } + } + + f_printf( "Done.\n"); + f_printf( "Running US character collation timing tests ... "); + + uiStartTime = FLM_GET_TIMER(); + + for( uiLoop = 0; uiLoop < 100; uiLoop++) + { + for( uiSubloop = 0; uiSubloop <= 0xFFFF; uiSubloop++) + { + flmGetCollation( (FLMUINT16)uiSubloop, FLM_US_LANG); + } + } + + uiSlowTime = FLM_ELAPSED_TIME( FLM_GET_TIMER(), uiStartTime); + uiStartTime = FLM_GET_TIMER(); + + for( uiLoop = 0; uiLoop < 100; uiLoop++) + { + for( uiSubloop = 0; uiSubloop <= 0xFFFF; uiSubloop++) + { + f_wpGetCollation( (FLMUINT16)uiSubloop, FLM_US_LANG); + } + } + + uiFastTime = FLM_ELAPSED_TIME( FLM_GET_TIMER(), uiStartTime); + + f_printf( "Slow time = %u, Fast time = %u. Done.\n", + (unsigned)uiSlowTime, (unsigned)uiFastTime); + + f_printf( "Running Zenkaku to Hankaku conversion tests ... "); + + for( uiLoop = 0; uiLoop <= 0xFFFF; uiLoop++) + { + FLMUINT16 ui16WpChar = (FLMUINT16)uiLoop; + FLMUINT16 ui16DakutenOrHandakuten; + FLMUINT16 ui16DakutenOrHandakuten2; + + if( (ZenToHankaku( ui16WpChar, &ui16DakutenOrHandakuten) != + f_wpZenToHankaku( ui16WpChar, &ui16DakutenOrHandakuten2)) || + ui16DakutenOrHandakuten != ui16DakutenOrHandakuten) + { + f_printf( "Zenkaku to Hankaku conversion failed on character 0x%04X.\n", + (unsigned)ui16WpChar); + rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); + goto Exit; + } + } + + f_printf( "Done.\n"); + f_printf( "Running Hankaku to Zenkaku conversion tests ... "); + + for( uiLoop = 0; uiLoop <= 0xFFFF; uiLoop++) + { + FLMUINT16 ui16WpChar = (FLMUINT16)uiLoop; + FLMUINT16 ui16Zenkaku; + FLMUINT16 ui16Zenkaku2; + FLMUINT uiNextWpChar; + + for( uiNextWpChar = 0x0B3D; uiNextWpChar <= 0x0B3E; uiNextWpChar++) + { + if( (HanToZenkaku( ui16WpChar, (FLMUINT16)uiNextWpChar, &ui16Zenkaku) != + f_wpHanToZenkaku( ui16WpChar, (FLMUINT16)uiNextWpChar, &ui16Zenkaku2)) || + ui16Zenkaku != ui16Zenkaku2) + { + f_printf( "Hankaku to Zenkaku conversion failed on character 0x%04X.\n", + (unsigned)ui16WpChar); + rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); + goto Exit; + } + } + } + + f_printf( "Done.\n"); + f_printf( "Running Unicode to WP conversion tests ... "); + + for( uiLoop = 0; uiLoop <= 0xFFFF; uiLoop++) + { + uzUniChar = (FLMUINT16)uiLoop; + bConverted = FALSE; + + if( uzUniChar < 127) + { + ui16WpChar = uzUniChar; + bConverted = TRUE; + } + else + { + FLMINT16 ui16Min; + FLMINT16 ui16Max; + FLMINT16 ui16Temp; + FLMUINT16 * pui16Table = (FLMUINT16 *) WP_UTOWP60; + FLMUINT16 ui16TblChr; + + ui16Min = 0; + ui16Max = 1020; + ui16WpChar = 0; + + do + { + ui16Temp = (ui16Min + ui16Max) >> 1; + ui16TblChr = *(pui16Table + (ui16Temp * 2)); + + if( ui16TblChr < uzUniChar) + { + ui16Min = ui16Temp + 1; + } + else if( ui16TblChr > uzUniChar) + { + ui16Max = ui16Temp - 1; + } + else + { + ui16WpChar = *(pui16Table + (ui16Temp * 2) + 1); + bConverted = TRUE; + break; + } + } while( ui16Min <= ui16Max); + } + + if( f_depricatedUnicodeToWP( uzUniChar, &ui16TmpWpChar) != bConverted || + ui16WpChar != ui16TmpWpChar) + { + f_printf( "Conversion failed on character 0x%04X.\n", + (unsigned)uzUniChar); + rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); + goto Exit; + } + } + + f_printf( "Done.\n"); + f_printf( "Running character break tests ... "); + + for( uiLoop = 0; uiLoop <= 0xFFFF; uiLoop++) + { + ui16WpChar = (FLMUINT16)uiLoop; + bNotFound = flmCh6Brkcar( ui16WpChar, &ui16BaseChar, &ui16DiacriticChar); + bNotFound2 = f_breakWPChar( ui16WpChar, &ui16BaseChar2, &ui16DiacriticChar2); + + if( bNotFound != bNotFound2 || ui16BaseChar != ui16BaseChar2 || + ui16DiacriticChar != ui16DiacriticChar2) + { + f_printf( "Diacritic test failed on character 0x%04X.\n", + (unsigned)ui16WpChar); + rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); + goto Exit; + } + } + + f_printf( "Done.\n"); + f_printf( "Running character combine tests ... "); + + for( uiLoop = 0; uiLoop <= 0x0FFF; uiLoop++) + { + for( uiSubloop = 0; uiSubloop < 0x0FFF; uiSubloop++) + { + bNotFound = flmCh6Cmbcar( &ui16Cmb, (FLMUINT16)uiLoop, (FLMUINT16)uiSubloop); + bNotFound2 = f_combineWPChar( &ui16Cmb2, (FLMUINT16)uiLoop, (FLMUINT16)uiSubloop); + + if( bNotFound != bNotFound2 || ui16Cmb != ui16Cmb2) + { + f_printf( "Diacritic test failed on character 0x%04X + 0x%04X.\n", + (unsigned)uiLoop, (unsigned)uiSubloop); + rc = RC_SET_AND_ASSERT( NE_FLM_FAILURE); + goto Exit; + } + } + } + + f_printf( "Done.\n"); + +Exit: + + return( rc); +} + +/**************************************************************************** +Desc: Break a WP character into a base and a diacritical char. +Ret: TRUE - if not found FALSE - if found +****************************************************************************/ +FLMBOOL flmCh6Brkcar( + FLMUINT16 ui16WpChar, + FLMUINT16 * pui16BaseChar, + FLMUINT16 * pui16DiacriticChar) +{ + BASE_DIACRIT * pBaseDiacritic; + FLMINT iTableIndex; + + if( HI( ui16WpChar) >= F_NCHSETS || + (pBaseDiacritic = flm_car60_c[HI( ui16WpChar)]) == 0) + { + return (TRUE); + } + + iTableIndex = ((FLMBYTE) ui16WpChar) - pBaseDiacritic->start_char; + + if( iTableIndex < 0 || + iTableIndex > pBaseDiacritic->char_count || + pBaseDiacritic->table[iTableIndex].base == (FLMBYTE) 0xFF) + { + return (TRUE); + } + + if( (HI( ui16WpChar) != F_CHSMUL1) || + ((flm_ml1_cb60[((FLMBYTE) ui16WpChar) >> 3] >> + (7 - (ui16WpChar & 0x07))) & 0x01)) + { + + // normal case, same base as same as characters + + *pui16BaseChar = (ui16WpChar & 0xFF00) | + pBaseDiacritic->table[iTableIndex].base; + + *pui16DiacriticChar = (ui16WpChar & 0xFF00) | + pBaseDiacritic->table[iTableIndex].diacrit; + } + else + { + + // Multi-national where base is ascii value. + + *pui16BaseChar = pBaseDiacritic->table[iTableIndex].base; + *pui16DiacriticChar = (ui16WpChar & 0xFF00) | + pBaseDiacritic->table[iTableIndex].diacrit; + } + + return( FALSE); +} + +/**************************************************************************** +Desc: Take a base and a diacritic and compose a WP character. +Ret: TRUE - if not found FALSE - if found +****************************************************************************/ +FLMBOOL flmCh6Cmbcar( + FLMUINT16 * pui16WpChar, + FLMUINT16 ui16BaseChar, + FLMINT16 ui16DiacriticChar) +{ + FLMUINT uiRemaining; + FLMBYTE ucCharSet; + FLMBYTE ucChar; + BASE_DIACRIT * pBaseDiacritic; + BASE_DIACRIT_TABLE * pTable; + + ucCharSet = HI( ui16BaseChar); + if( ucCharSet > F_NCHSETS) + { + return (TRUE); + } + + // Is base ASCII? If so, look in multinational 1 + + if( !ucCharSet) + { + ucCharSet = F_CHSMUL1; + } + + if( (pBaseDiacritic = flm_car60_c[ucCharSet]) == 0) + { + return (TRUE); + } + + ucChar = LO( ui16BaseChar); + ui16DiacriticChar = LO( ui16DiacriticChar); + pTable = pBaseDiacritic->table; + + for( uiRemaining = pBaseDiacritic->char_count; + uiRemaining; + uiRemaining--, pTable++) + { + // Same base? + + if( pTable->base == ucChar && + (pTable->diacrit & 0x7F) == ui16DiacriticChar) + { + + // Same diacritic? + + *pui16WpChar = (FLMUINT16) (((FLMUINT16) ucCharSet << 8) + + (pBaseDiacritic->start_char + + (FLMUINT16) (pTable - pBaseDiacritic->table))); + return (FALSE); + } + } + + return (TRUE); +} + +/**************************************************************************** +Desc: Converts a character to upper case (if possible) +****************************************************************************/ +FLMUINT16 flmCh6Upper( + FLMUINT16 ui16WpChar) +{ + if (ui16WpChar < 256) + { + if (ui16WpChar >= ASCII_LOWER_A && ui16WpChar <= ASCII_LOWER_Z) + { + + // Return ASCII upper case + + return (ui16WpChar & 0xdf); + } + } + else + { + FLMBYTE ucCharSet = (FLMBYTE)(ui16WpChar >> 8); + + if (ucCharSet == F_CHSMUL1) + { + FLMBYTE ucChar = (FLMBYTE)(ui16WpChar & 0xFF); + + if (ucChar >= flmCaseConvertableRange[(F_CHSMUL1 - 1) * 2] && + ucChar <= flmCaseConvertableRange[((F_CHSMUL1 - 1) * 2) + 1]) + { + return (ui16WpChar & 0xFFFE); + } + } + else if (ucCharSet == F_CHSGREK) + { + if ((ui16WpChar & 0xFF) <= flmCaseConvertableRange[ + ((F_CHSGREK - 1) * 2) + 1]) + { + return (ui16WpChar & 0xFFFE); + } + } + else if (ucCharSet == F_CHSCYR) + { + if ((ui16WpChar & 0xFF) <= flmCaseConvertableRange[ + ((F_CHSCYR - 1) * 2) + 1]) + { + return (ui16WpChar & 0xFFFE); + } + } + else if (ui16WpChar >= Lower_JP_a) + { + + // Possible double byte character set alphabetic character? + + if (ui16WpChar <= Lower_JP_z) + { + + // Japanese? + + ui16WpChar = (ui16WpChar - Lower_JP_a) + Upper_JP_A; + } + else if (ui16WpChar >= Lower_KR_a && ui16WpChar <= Lower_KR_z) + { + + // Korean? + + ui16WpChar = (ui16WpChar - Lower_KR_a) + Upper_KR_A; + } + else if (ui16WpChar >= Lower_CS_a && ui16WpChar <= Lower_CS_z) + { + + // Chinese Simplified? + + ui16WpChar = (ui16WpChar - Lower_CS_a) + Upper_CS_A; + } + else if (ui16WpChar >= Lower_CT_a && ui16WpChar <= Lower_CT_z) + { + + // Chinese Traditional? + + ui16WpChar = (ui16WpChar - Lower_CT_a) + Upper_CT_A; + } + } + } + + // Return original character - original not in lower case. + + return (ui16WpChar); +} + +/**************************************************************************** +Desc: Checks to see if WP character is upper case +****************************************************************************/ +FLMBOOL flmIsUpper( + FLMUINT16 ui16WpChar) +{ + FLMBYTE ucChar; + FLMBYTE ucCharSet; + + // Get character + + ucChar = (FLMBYTE) (ui16WpChar & 0xFF); + + // Test if ASCII character set + + if (!(ui16WpChar & 0xFF00)) + { + return ((ucChar >= ASCII_LOWER_A && ucChar <= ASCII_LOWER_Z) + ? FALSE + : TRUE); + } + + // Get the character set + + ucCharSet = (FLMBYTE) (ui16WpChar >> 8); + + if ((ucCharSet == F_CHSMUL1 && ucChar >= 26 && ucChar <= 241) || + (ucCharSet == F_CHSGREK && ucChar <= 69) || + (ucCharSet == F_CHSCYR && ucChar <= 199)) + { + return ((ucChar & 1) ? FALSE : TRUE); + } + + // Don't care that double ss is lower + + return (TRUE); +} + +/**************************************************************************** +Desc: Converts a character to lower case (if possible) +****************************************************************************/ +FLMUINT16 flmCh6Lower( + FLMUINT16 ui16WpChar) +{ + if (ui16WpChar < 256) + { + if (ui16WpChar >= ASCII_UPPER_A && ui16WpChar <= ASCII_UPPER_Z) + { + return (ui16WpChar | 0x20); + } + } + else + { + FLMBYTE ucCharSet = (FLMBYTE)(ui16WpChar >> 8); + + if (ucCharSet == F_CHSMUL1) + { + FLMBYTE ucChar = (FLMBYTE)(ui16WpChar & 0xFF); + + if (ucChar >= flmCaseConvertableRange[(F_CHSMUL1 - 1) * 2] && + ucChar <= flmCaseConvertableRange[((F_CHSMUL1 - 1) * 2) + 1]) + { + return (ui16WpChar | 1); + } + } + else if (ucCharSet == F_CHSGREK) + { + if ((ui16WpChar & 0xFF) <= flmCaseConvertableRange[ + ((F_CHSGREK - 1) * 2) + 1]) + { + return (ui16WpChar | 1); + } + } + else if (ucCharSet == F_CHSCYR) + { + if ((ui16WpChar & 0xFF) <= flmCaseConvertableRange[ + ((F_CHSCYR - 1) * 2) + 1]) + { + return (ui16WpChar | 1); + } + } + else if (ui16WpChar >= Upper_JP_A) + { + + // Possible double byte character set alphabetic character? + + if (ui16WpChar <= Upper_JP_Z) + { + + // Japanese? + + ui16WpChar = ui16WpChar - Upper_JP_A + Lower_JP_a; + } + else if (ui16WpChar >= Upper_KR_A && ui16WpChar <= Upper_KR_Z) + { + + // Korean? + + ui16WpChar = ui16WpChar - Upper_KR_A + Lower_KR_a; + } + else if (ui16WpChar >= Upper_CS_A && ui16WpChar <= Upper_CS_Z) + { + + // Chinese Simplified? + + ui16WpChar = ui16WpChar - Upper_CS_A + Lower_CS_a; + } + else if (ui16WpChar >= Upper_CT_A && ui16WpChar <= Upper_CT_Z) + { + + // Chinese Traditional? + + ui16WpChar = ui16WpChar - Upper_CT_A + Lower_CT_a; + } + } + } + + // Return original character, original not in upper case + + return (ui16WpChar); +} + +/**************************************************************************** +Desc: getNextCharState can be thought of as a 2 dimentional array with + i and j as the row and column indicators respectively. If a value + exists at the intersection of i and j, it is returned. Sparse array + techniques are used to minimize memory usage. +****************************************************************************/ +FINLINE FLMUINT16 getNextCharState( + FLMUINT i, + FLMUINT j) +{ + FLMUINT k; + FLMUINT x; + + for( k = flmIndexi[ x = (i > START_COL) ? (START_ALL) : i ]; + k <= (FLMUINT) (flmIndexi[ x + 1] - 1); + k++ ) + { + if( j == flmIndexj[ k]) + { + return( flmValuea[ (i > START_COL) + ? (k + (FIXUP_AREA_SIZE * (i - START_ALL))) + : k]); + } + } + + return(0); +} + +/************************************************************************** +Desc: Find the collating value of a WP character +Ret: Collating value (COLS0 is high value - undefined WP char) +***************************************************************************/ +FLMUINT16 flmGetCollation( + FLMUINT16 ui16WpChar, + FLMUINT uiLanguage) +{ + FLMUINT16 ui16State; + FLMBYTE ucCharVal; + FLMBYTE ucCharSet; + FLMBOOL bHebrewArabicFlag = FALSE; + TBL_B_TO_BP * pColTbl = flmCol60Tbl; + + // State ONLY for non-US + + if (uiLanguage != FLM_US_LANG) + { + if (uiLanguage == FLM_AR_LANG || // Arabic + uiLanguage == FLM_FA_LANG || // Farsi - persian + uiLanguage == FLM_HE_LANG || // Hebrew + uiLanguage == FLM_UR_LANG) // Urdu + { + pColTbl = flmHebArabicCol60Tbl; + bHebrewArabicFlag = TRUE; + } + else + { + + // Check if uiLanguage candidate for alternate double collating + + ui16State = getNextCharState( START_COL, uiLanguage); + if (0 != (ui16State = getNextCharState( (ui16State + ? ui16State // look at special case languages + : START_ALL), // look at US and European + (FLMUINT) ui16WpChar))) + { + return( ui16State); + } + } + } + + ucCharVal = (FLMBYTE)ui16WpChar; + ucCharSet = (FLMBYTE)(ui16WpChar >> 8); + + // This is an optimized version of f_b_bp_citrp() inline for performance + + do + { + if (pColTbl->key == ucCharSet) + { + FLMBYTE * pucColVals; + + pucColVals = pColTbl->charPtr; + + // Above lower range of table? + + if (ucCharVal >= *pucColVals) + { + + // Make value zero based to index + + ucCharVal = ucCharVal - *pucColVals++; + + // Below maximum number of table entries? + + if (ucCharVal < *pucColVals++) + { + + // Return collated value. + + return( pucColVals[ ucCharVal]); + } + } + } + + // Go to next table entry + + pColTbl++; + } while (pColTbl->key != 0xFF); + + if (bHebrewArabicFlag) + { + if (ucCharSet == F_CHSHEB || + ucCharSet == F_CHSARB1 || + ucCharSet == F_CHSARB2) + { + + // Same as COLS0_HEBREW + + return( COLS0_ARABIC); + } + } + + // Defaults for characters that don't have a collation value. + + return( COLS0); +} + +/**************************************************************************** +Desc: Convert a WPChar from hankaku (single wide) to zenkaku (double wide). +Ret: 0 = no conversion + 1 = converted character to zenkaku + 2 = ui16NextWpChar dakuten or handakuten voicing got combined +****************************************************************************/ +FLMUINT16 HanToZenkaku( + FLMUINT16 ui16WpChar, + FLMUINT16 ui16NextWpChar, + FLMUINT16 * pui16Zenkaku) +{ + FLMUINT16 ui16Zenkaku = 0; + FLMBYTE ucCharSet = (FLMBYTE)(ui16WpChar >> 8); + FLMBYTE ucCharVal = (FLMBYTE)(ui16WpChar & 0xFF); + FLMUINT uiLoop; + FLMUINT16 ui16CharsUsed = 1; + + switch (ucCharSet) + { + + // Character set 0 - symbols + + case 0: + { + + // Invalid? - all others are used. + + if (ucCharVal < 0x20) + { + ; + } + else if (ucCharVal <= 0x2F) + { + + // Symbols A + + ui16Zenkaku = 0x2400 + From0AToZen[ucCharVal - 0x20]; + } + else if (ucCharVal <= 0x39) + { + + // 0..9 + + ui16Zenkaku = 0x2500 + (ucCharVal - 0x21); + } + else if (ucCharVal <= 0x40) + { + + // Symbols B + + ui16Zenkaku = 0x2400 + From0BToZen[ucCharVal - 0x3A]; + } + else if (ucCharVal <= 0x5A) + { + + // A..Z + + ui16Zenkaku = 0x2500 + (ucCharVal - 0x21); + } + else if (ucCharVal <= 0x60) + { + + // Symbols C + + ui16Zenkaku = 0x2400 + From0CToZen[ucCharVal - 0x5B]; + } + else if (ucCharVal <= 0x7A) + { + + // a..z + + ui16Zenkaku = 0x2500 + (ucCharVal - 0x21); + } + else if (ucCharVal <= 0x7E) + { + + // Symbols D + + ui16Zenkaku = 0x2400 + From0DToZen[ucCharVal - 0x7B]; + } + break; + } + + // GREEK + + case 8: + { + if ((ucCharVal >= sizeof(From8ToZen)) || + ((ui16Zenkaku = 0x2600 + From8ToZen[ucCharVal]) == 0x26FF)) + { + ui16Zenkaku = 0; + } + break; + } + + // CYRILLIC + + case 10: + { + + // Check range + + ui16Zenkaku = 0x2700 + (ucCharVal >> 1); // Uppercase value + + // Convert to lower case? + + if (ucCharVal & 0x01) + { + ui16Zenkaku += 0x30; + } + break; + } + + // JAPANESE + + case 11: + { + if (ucCharVal < 5) + { + ui16Zenkaku = 0x2400 + From11AToZen[ucCharVal]; + } + else if (ucCharVal < 0x3D) // katakana? + { + if ((ui16Zenkaku = 0x2600 + From11BToZen[ucCharVal - 5]) == 0x26FF) + { + + // Dash - convert to this + + ui16Zenkaku = 0x241b; + } + else + { + if (ui16NextWpChar == 0xB3D) // dakuten? - voicing + { + + // First check exception(s) then check if voicing + // exists! + + if ((ui16Zenkaku != 0x2652) && // is not 'N'? + (flmKanaSubColTbl[ui16Zenkaku - 0x2600 + 1] == 3)) + { + ui16Zenkaku++; + + // Return 2 + + ui16CharsUsed++; + } + } + else if (ui16NextWpChar == 0xB3E) // handakuten? - voicing + { + + // Check if voicing exists! - will NOT access out of + // table + + if (flmKanaSubColTbl[ui16Zenkaku - 0x2600 + 2] == 5) + { + ui16Zenkaku += 2; + + // Return 2 + + ui16CharsUsed++; + } + } + } + } + else if (ucCharVal == 0x3D) // dakuten? + { + + // Convert to voicing symbol + + ui16Zenkaku = 0x240A; + } + else if (ucCharVal == 0x3E) // handakuten? + { + + // Convert to voicing symbol + + ui16Zenkaku = 0x240B; + } + + // else cannot convert + + break; + } + + // Other character sets CS 1,4,5,6 - symbols + + default: + { + + // Instead of includes more tables from char.asm - look down the + // Zen24Tohankaku[] table for a matching value - not much slower. + + for (uiLoop = 0; + uiLoop < (sizeof(Zen24ToHankaku) / sizeof(BYTE_WORD_TBL)); + uiLoop++) + { + if (Zen24ToHankaku[uiLoop].WordValue == ui16WpChar) + { + ui16Zenkaku = 0x2400 + Zen24ToHankaku[uiLoop].ByteValue; + break; + } + } + break; + } + } + + if (!ui16Zenkaku) + { + + // Change return value + + ui16CharsUsed = 0; + } + + *pui16Zenkaku = ui16Zenkaku; + return (ui16CharsUsed); +} + +/**************************************************************************** +Desc: Convert a zenkaku (double wide) char to a hankaku (single wide) char +Ret: Hankaku char or 0 if a conversion doesn't exist +****************************************************************************/ +FLMUINT16 ZenToHankaku( + FLMUINT16 ui16WpChar, + FLMUINT16 * DakutenOrHandakutenRV) +{ + FLMUINT16 ui16Hankaku = 0; + FLMBYTE ucCharSet = (FLMBYTE)(ui16WpChar >> 8); + FLMBYTE ucCharVal = (FLMBYTE)(ui16WpChar & 0xFF); + FLMUINT uiLoop; + + switch (ucCharSet) + { + + // SYMBOLS + + case 0x24: + { + for (uiLoop = 0; + uiLoop < (sizeof(Zen24ToHankaku) / sizeof(BYTE_WORD_TBL)); + uiLoop++) + { + + // List is sorted so table entry is more you are done + + if (Zen24ToHankaku[uiLoop].ByteValue >= ucCharVal) + { + if (Zen24ToHankaku[uiLoop].ByteValue == ucCharVal) + { + ui16Hankaku = Zen24ToHankaku[uiLoop].WordValue; + } + break; + } + } + break; + } + + // ROMAN - 0x250F..2559 Hiragana - 0x255E..2580 + + case 0x25: + { + if (ucCharVal >= 0x0F && ucCharVal < 0x5E) + { + ui16Hankaku = ucCharVal + 0x21; + } + break; + } + + // Katakana - 0x2600..2655 Greek - 0x265B..2695 + + case 0x26: + { + if (ucCharVal <= 0x55) // Katakana range + { + FLMBYTE ucCS11CharVal; + FLMUINT16 ui16NextWpChar = 0; + + if ((ucCS11CharVal = MapCS26ToCharSet11[ucCharVal]) != 0xFF) + { + if (ucCS11CharVal & 0x80) + { + if (ucCS11CharVal & 0x40) + { + + // Handakuten voicing + + ui16NextWpChar = 0xB3E; + } + else + { + + // Dakuten voicing + + ui16NextWpChar = 0xB3D; + } + + ucCS11CharVal &= 0x3F; + } + + ui16Hankaku = 0x0b00 + ucCS11CharVal; + if (ui16NextWpChar && DakutenOrHandakutenRV) + { + *DakutenOrHandakutenRV = ui16NextWpChar; + } + } + } + else if (ucCharVal <= 0x95) // Greek + { + FLMBYTE ucGreekChar = ucCharVal; + + // Make a zero based number. + + ucGreekChar -= 0x5E; + + // Check for lowercase + + if (ucGreekChar >= 0x20) + { + + // Convert to upper case for now + + ucGreekChar -= 0x20; + } + + if (ucGreekChar >= 2) + { + ucGreekChar++; + } + + if (ucGreekChar >= 19) + { + ucGreekChar++; + } + + // Convert to character set 8 + + ui16Hankaku = (ucGreekChar << 1) + 0x800; + if (ucCharVal >= (0x5E + 0x20)) + { + + // Adjust to lower case character + + ui16Hankaku++; + } + } + break; + } + + // Cyrillic + + case 0x27: + { + + // Uppercase? + + if (ucCharVal <= 0x20) + { + ui16Hankaku = (ucCharVal << 1) + 0xa00; + } + else if (ucCharVal >= 0x30 && ucCharVal <= 0x50) + { + + // Lower case + + ui16Hankaku = ((ucCharVal - 0x30) << 1) + 0xa01; + } + break; + } + } + + return (ui16Hankaku); +}