//------------------------------------------------------------------------- // Desc: Collation for Asian languages. // Tabs: 3 // // Copyright (c) 1991-1992,1994-2001,2003,2005-2006 Novell, Inc. All Rights Reserved. // // This program is free software; you can redistribute it and/or // modify it under the terms of version 2 of the GNU General Public // License as published by the Free Software Foundation. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, contact Novell, Inc. // // To contact Novell about this file by physical or electronic mail, // you may find current contact information at www.novell.com // // $Id: fwpasia.cpp 12301 2006-01-19 15:02:55 -0700 (Thu, 19 Jan 2006) dsanders $ //------------------------------------------------------------------------- #include "flaimsys.h" #define SET_CASE_BIT 0x01 #define SET_KATAKANA_BIT 0x01 #define SET_WIDTH_BIT 0x02 #define COLS_ASIAN_MARKS 0x140 extern FLMBYTE fwp_dia60Tbl[]; /* Diacritic conversions */ /**---------------------------------------------- *** Tables *** The tables below were taken from the *** following files: *** XCH2COL.ASM *** CMPWS.ASM - k_diac (KanaSubColTbl[]) ***---------------------------------------------*/ /**--------------------------------------------- *** Map special chars in CharSet (x24) to *** collation values ***--------------------------------------------*/ BYTE_WORD_TBL fwp_Ch24ColTbl[] = /* Position in the table+1 is subColValue */ { {1, COLLS+2}, /* comma */ {2, COLLS+1}, /* maru */ {5, COLS_ASIAN_MARKS+2}, /* chuuten */ {10, COLS_ASIAN_MARKS}, /* dakuten */ {11, COLS_ASIAN_MARKS+1}, /* handakuten */ {43, COLS2+2}, /* angled brackets */ {44, COLS2+3}, /* */ {49, COLS2+2}, /* pointy brackets */ {50, COLS2+3}, {51, COLS2+2}, /* double pointy brackets */ {52, COLS2+3}, {53, COLS1}, /* Japanese quotes */ {54, COLS1}, {55, COLS1}, /* hollow Japanese quotes */ {56, COLS1}, {57, COLS2+2}, /* filled rounded brackets */ {58, COLS2+3} }; /**------------------------------------- *** Kana subcollation values *** BIT 0: set if large char *** BIT 1: set if voiced *** BIT 2: set if half voiced *** Note: *** To save space should be nibbles *** IMPORTANT: *** The '1' entries that do not have *** a matching '0' entry have been *** changed to zero to save space in *** the subcollation area. *** The original table is listed below. ***------------------------------------*/ FLMBYTE KanaSubColTbl[] = { 0,1,0,1,0,1,0,1,0,1, /* a A i I u U e E o O */ 1,3,0,3,0,3,1,3,0,3, /* KA GA KI GI KU GU KE GE KO GO */ 0,3,0,3,0,3,0,3,0,3, /* SA ZA SHI JI SU ZU SE ZE SO ZO */ 0,3,0,3,0,1,3,0,3,0,3, /* TA DA CHI JI tsu TSU ZU TE DE TO DO*/ 0,0,0,0,0, /* NA NI NU NE NO */ 0,3,5,0,3,5,0,3,5, /* HA BA PA HI BI PI FU BU PU */ 0,3,5,0,3,5, /* HE BE PE HO BO PO */ 0,0,0,0,0, /* MA MI MU ME MO */ 0,1,0,1,0,1, /* ya YA yu YU yo YO */ 0,0,0,0,0, /* RA RI RU RE RO */ 0,1,0,0,0, /* wa WA WI WE WO */ 0,3,0,0 /* N VU ka ke */ }; /** *** Map katakana (CharSet x26) to collation values *** kana collating values are two byte values *** where the high byte is 0x01. **/ FLMBYTE KanaColTbl[] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,/* a A i I u U e E o O */ 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,/* KA GA KI GI KU GU KE GE KO GO */ 10,10,11,11,12,12,13,13,14,14,/* SA ZA SHI JI SU ZU SE ZE SO ZO */ 15,15,16,16,17,17,17,18,18,19,19,/* TA DA CHI JI tsu TSU ZU TE DE TO DO*/ 20,21,22,23,24, /* NA NI NU NE NO */ 25,25,25,26,26,26,27,27,27, /* HA BA PA HI BI PI FU BU PU */ 28,28,28,29,29,29, /* HE BE PE HO BO PO */ 30,31,32,33,34, /* MA MI MU ME MO */ 35,35,36,36,37,37, /* ya YA yu YU yo YO */ 38,39,40,41,42, /* RA RI RU RE RO */ 43,43,44,45,46, /* wa WA WI WE WO */ 47, 2, 5, 8 /* N VU ka ke */ }; /**--------------------------------------- *** Map KataKana collated value to vowel *** value for use for the previous char. ***--------------------------------------*/ FLMBYTE KanaColToVowel[] = { 0,1,2,3,4, /* a i u e o */ 0,1,2,3,4, /* ka ki ku ke ko */ 0,1,2,3,4, /* sa shi su se so */ 0,1,2,3,4, /* ta chi tsu te to */ 0,1,2,3,4, /* na ni nu ne no */ 0,1,2,3,4, /* ha hi hu he ho */ 0,1,2,3,4, /* ma mi mu me mo */ 0,2,4, /* ya yu yo */ 0,1,2,3,4, /* ra ri ru re ro */ 0,1,3,4, /* wa wi we wo */ }; /** *** Convert Zenkaku (double wide) to Hankaku (single wide) *** Character set 0x24 maps to single wide chars in other char sets. *** This enables collation values to be found on some symbols. *** This is also used to convert symbols from hankaku to Zen24. *** **/ BYTE_WORD_TBL Zen24ToHankaku[] = { { 0 ,0x0020 }, /* space */ { 1 ,0x0b03 }, /* japanese comma */ { 2 ,0x0b00 }, /* circle period */ { 3 , 44 }, /* comma */ { 4 , 46 }, /* period */ { 5 ,0x0b04 }, /* center dot */ { 6 , 58 }, /* colon */ { 7 , 59 }, /* semicolon */ { 8 , 63 }, /* question mark */ { 9 , 33 }, /* exclamation mark */ { 10 ,0x0b3d }, /* dakuten */ { 11 ,0x0b3e }, /* handakuten */ { 12 ,0x0106 }, /* accent mark */ { 13 , 96 }, /* accent mark */ { 14 ,0x0107 }, /* umlat */ { 15 , 94 }, /* caret */ { 16 ,0x0108 }, /* macron */ { 17 , 95 }, /* underscore */ { 27 ,0x0b0f }, /* extend vowel */ { 28 ,0x0422 }, /* mdash */ { 29 , 45 }, /* hyphen */ { 30 , 47 }, /* slash */ { 31 ,0x0607 }, /* backslash */ { 32 , 126 }, /* tilde */ { 33 ,0x0611 }, /* doubleline */ { 34 ,0x0609 }, /* line */ { 37 ,0x041d }, /* left apostrophe */ { 38 ,0x041c }, /* right apostrophe */ { 39 ,0x0420 }, /* left quote */ { 40 ,0x041f }, /* right quote */ { 41 , 40 }, /* left paren */ { 42 , 41 }, /* right paren */ { 45 , 91 }, /* left bracket */ { 46 , 93 }, /* right bracket */ { 47 , 123 }, /* left curly bracket */ { 48 , 125 }, /* right curly bracket */ { 53 ,0x0b01 }, /* left j quote */ { 54 ,0x0b02 }, /* right j quote */ { 59 , 43 }, /* plus */ { 60 ,0x0600 }, /* minus */ { 61 ,0x0601 }, /* plus/minus */ { 62 ,0x0627 }, /* times */ { 63 ,0x0608 }, /* divide */ { 64 , 61 }, /* equal */ { 65 ,0x0663 }, /* unequal */ { 66 , 60 }, /* less */ { 67 , 62 }, /* greater */ { 68 ,0x0602 }, /* less/equal */ { 69 ,0x0603 }, /* greater/equal */ { 70 ,0x0613 }, /* infinity */ { 71 ,0x0666 }, /* traingle dots */ { 72 ,0x0504 }, /* man */ { 73 ,0x0505 }, /* woman */ { 75 ,0x062d }, /* prime */ { 76 ,0x062e }, /* double prime */ { 78 ,0x040c }, /* yen */ { 79 , 36 }, /* $ */ { 80 ,0x0413 }, /* cent */ { 81 ,0x040b }, /* pound */ { 82 , 37 }, /* % */ { 83 , 35 }, /* # */ { 84 , 38 }, /* & */ { 85 , 42 }, /* * */ { 86 , 64 }, /* @ */ { 87 ,0x0406 }, /* squiggle */ { 89 ,0x06b8 }, /* filled star */ { 90 ,0x0425 }, /* hollow circle */ { 91 ,0x042c }, /* filled circle */ { 93 ,0x065f }, /* hollow diamond */ { 94 ,0x0660 }, /* filled diamond */ { 95 ,0x0426 }, /* hollow box */ { 96 ,0x042e }, /* filled box */ { 97 ,0x0688 }, /* hollow triangle */ { 99 ,0x0689 }, /* hollow upside down triangle */ { 103,0x0615 }, /* right arrow */ { 104,0x0616 }, /* left arrow */ { 105,0x0617 }, /* up arrow */ { 106,0x0622 }, /* down arrow */ { 119,0x060f }, /* */ { 121,0x0645 }, /* */ { 122,0x0646 }, { 123,0x0643 }, { 124,0x0644 }, { 125,0x0642 }, /* union */ { 126,0x0610 }, /* intersection */ { 135,0x0655 }, { 136,0x0656 }, { 138,0x0638 }, /* right arrow */ { 139,0x063c }, /* left/right arrow */ { 140,0x067a }, /* */ { 141,0x0679 }, { 153,0x064f }, /* angle */ { 154,0x0659 }, { 155,0x065a }, { 156,0x062c }, { 157,0x062b }, { 158,0x060e }, { 159,0x06b0 }, { 160,0x064d }, { 161,0x064e }, { 162,0x050e }, /* square root */ { 164,0x0604 }, { 175,0x0623 }, /* angstrom */ { 176,0x044b }, /* percent */ { 177,0x051b }, /* sharp */ { 178,0x051c }, /* flat */ { 179,0x0509 }, /* musical note */ { 180,0x0427 }, /* dagger */ { 181,0x0428 }, /* double dagger */ { 182,0x0405 }, /* paragraph */ { 187,0x068f } /* big hollow circle */ }; /** *** Maps CS26 to CharSet 11 *** Taken from Char.asm *** Used to uncollate characters for FLAIM - placed here for consistency *** 0x80 - add dakuten *** 0xC0 - add handakuten *** 0xFF - no mapping exists **/ FLMBYTE MapCS26ToCharSet11[ 86 ] = { 0x06, /* 0 a */ 0x10, /* 1 A */ 0x07, /* 2 i */ 0x11, /* 3 I */ 0x08, /* 4 u */ 0x12, /* 5 U */ 0x09, /* 6 e */ 0x13, /* 7 E */ 0x0a, /* 8 o */ 0x14, /* 9 O */ 0x15, /* 0x0a KA */ 0x95, /* GA - 21 followed by 0x3D dakuten */ 0x16, /* 0x0c KI */ 0x96, /* GI */ 0x17, /* 0x0e KU */ 0x97, /* GU */ 0x18, /* 0x10 KE */ 0x98, /* GE */ 0x19, /* 0x12 KO */ 0x99, /* GO */ 0x1a, /* 0x14 SA */ 0x9a, /* ZA */ 0x1b, /* 0x16 SHI */ 0x9b, /* JI */ 0x1c, /* 0x18 SU */ 0x9c, /* ZU */ 0x1d, /* 0x1a SE */ 0x9d, /* ZE */ 0x1e, /* 0x1c SO */ 0x9e, /* ZO */ 0x1f, /* 0x1e TA */ 0x9f, /* DA */ 0x20, /* 0x20 CHI */ 0xa0, /* JI */ 0x0e, /* 0x22 small tsu */ 0x21, /* 0x23 TSU */ 0xa1, /* ZU */ 0x22, /* 0x25 TE */ 0xa2, /* DE */ 0x23, /* 0x27 TO */ 0xa3, /* DO */ 0x24, /* 0x29 NA */ 0x25, /* 0x2a NI */ 0x26, /* 0x2b NU */ 0x27, /* 0x2c NE */ 0x28, /* 0x2d NO */ 0x29, /* 0x2e HA */ 0xa9, /* 0x2f BA */ 0xe9, /* 0x30 PA */ 0x2a, /* 0x31 HI */ 0xaa, /* 0x32 BI */ 0xea, /* 0x33 PI */ 0x2b, /* 0x34 FU */ 0xab, /* 0x35 BU */ 0xeb, /* 0x36 PU */ 0x2c, /* 0x37 HE */ 0xac, /* 0x38 BE */ 0xec, /* 0x39 PE */ 0x2d, /* 0x3a HO */ 0xad, /* 0x3b BO */ 0xed, /* 0x3c PO */ 0x2e, /* 0x3d MA */ 0x2f, /* 0x3e MI */ 0x30, /* 0x3f MU */ 0x31, /* 0x40 ME */ 0x32, /* 0x41 MO */ 0x0b, /* 0x42 small ya */ 0x33, /* 0x43 YA */ 0x0c, /* 0x44 small yu */ 0x34, /* 0x45 YU */ 0x0d, /* 0x46 small yo */ 0x35, /* 0x47 YO */ 0x36, /* 0x48 RA */ 0x37, /* 0x49 RI */ 0x38, /* 0x4a RU */ 0x39, /* 0x4b RE */ 0x3a, /* 0x4c RO */ 0xff, /* 0x4d small wa */ 0x3b, /* 0x4e WA */ 0xff, /* 0x4f WI */ 0xff, /* 0x50 WE */ 0x05, /* 0x51 WO */ 0x3c, /* 0x52 N */ 0xff, /* 0x53 VU */ 0xff, /* 0x54 ka */ 0xff /* 0x55 ke */ }; /** *** Conversion from single (Hankaku) to double (Zenkaku) wide characters *** Used in HanToZenkaku() **/ /* maps from charset 11 to CS24 (punctuation) (starting from 11,0) */ FLMBYTE From0AToZen[] = { /* ' changed because of windows */ 0, 9, 40, 0x53, /* sp ! " # */ 0x4f, 0x52, 0x54, 38, /* $ % & ' */ /* Was 187 for ! and 186 for ' */ 0x29, 0x2a, 0x55, 0x3b, /* ( ) * + */ 3, 0x1d, 4, 0x1e /* , - . / */ }; FLMBYTE From0BToZen[] = { 6, 7, 0x42, 0x40, /* : ; < = */ 0x43, 8, 0x56 /* > ? @ */ }; FLMBYTE From0CToZen[] = { 0x2d, 0x1f, 0x2e, 0x0f, 0x11, 0x0d /* [ \ ] ^ _ ` */ }; FLMBYTE From0DToZen[] = { 0x2f, 0x22, 0x30, 0x20 /* { | } ~ */ }; FLMBYTE From8ToZen[] = { /* Fast way to convert from 8 to zen */ 0x5e, 0x7e, 0x5f, 0x7f, 0x5f, 0xFF, 0x60, 0x80, 0x61, 0x81, 0x62, 0x82, 0x63, 0x83, 0x64, 0x84, 0x65, 0x85, 0x66, 0x86, 0x67, 0x87, 0x68, 0x88, 0x69, 0x89, 0x6a, 0x8a, 0x6b, 0x8b, 0x6c, 0x8c, 0x6d, 0x8d, 0x6e, 0x8e, 0x6f, 0x8f, 0x6f, 0xFF, 0x70, 0x90, 0x71, 0x91, 0x72, 0x92, 0x73, 0x93, 0x74, 0x94, 0x75, 0x95 }; static FLMBYTE From11AToZen[] = { /* 11 to 24 punctuation except dash */ 2, /* japanese period */ 0x35, /* left bracket */ 0x36, /* right bracket */ 0x01, /* comma */ 0x05 /* chuuten */ }; static FLMBYTE From11BToZen[] = { /* 11 to 26 (katakana) from 11,5 */ 0x51, /* wo */ 0,2,4,6,8,0x42,0x44,0x46,0x22, /* small a i u e o ya yu yo tsu */ 0xFF, 1, 3, 5, 7, 9, /* dash (x241b) a i u e o */ 0x0a, 0x0c, 0x0e, 0x10, 0x12, /* ka ki ku ke ko */ 0x14, 0x16, 0x18, 0x1a, 0x1c, /* sa shi su se so */ 0x1e, 0x20, 0x23, 0x25, 0x27, /* ta chi tsu te to */ 0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* na ni nu ne no */ 0x2e, 0x31, 0x34, 0x37, 0x3a, /* ha hi fu he ho */ 0x3d, 0x3e, 0x3f, 0x40, 0x41, /* ma mi mu me mo */ 0x43, 0x45, 0x47, /* ya yu yo */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, /* ra ri ru re ro */ 0x4e, 0x52 /* WA N */ }; /* does not have wa WI WE VU ka ke */ /**************************************************************************** Desc: Returns the collation value of the input Wp character. If in charset 11 will convert the character to Zenkaku (double wide). In: ui16WpChar - Char to collate off of - could be in CS0..14 or x24..up ui16NextWpChar - next WP char for CS11 voicing marks ui16PrevColValue - previous collating value - for repeat/vowel repeat pui16ColValue - returns 2 byte collation value pui16SubColVal - 0, 6 or 16 bit value for the latin sub collation or the kana size & vowel voicing 001 - set if large (upper) character 010 - set if voiced 100 - set if half voiced pucCaseBits - returns 2 bits Latin/Greek/Cyrillic 01 - case bit set if character is uppercase 10 - double wide character in CS 0x25xx, 0x26xx and 0x27xx Japanese 00 - double wide hiragana 0x255e..25b0 01 - double wide katakana 0x2600..2655 10 - double wide symbols that map to charset 11 11 - single wide katakana from charset 11 Ret: 0 - no valid collation value high values set for pui16ColValue Sub-collation gets original WP character value 1 - valid collation value 2 - valid collation value and used the ui16NextWpChar Notes: Code taken from XCH2COL.ASM - routine xch2col_f also from CMPWS.ASM - routine getcase Terms: HANKAKU - single wide characters in charsets 0..14 ZENKAKU - double wide characters in charsets 0x24..end of kanji KANJI - collation values are 0x2900 less than WPChar value ****************************************************************************/ FLMUINT16 fwpAsiaGetCollation( FLMUINT16 ui16WpChar, // WP char to get collation values FLMUINT16 ui16NextWpChar, // Next WP char - for CS11 voicing marks FLMUINT16 ui16PrevColValue, // Previous collating value FLMUINT16 * pui16ColValue, // Returns collation value FLMUINT16 * pui16SubColVal, // Returns sub-collation value FLMBYTE * pucCaseBits, // Returns case bits value FLMUINT16 uiUppercaseFlag // Set if to convert to uppercase ) { FLMUINT16 ui16ColValue; FLMUINT16 ui16SubColVal; FLMBYTE ucCaseBits = 0; FLMBYTE ucCharSet = ui16WpChar >> 8; FLMBYTE ucCharVal = ui16WpChar & 0xFF; FLMUINT16 ui16Hankaku; FLMUINT uiLoop; FLMUINT16 ui16ReturnValue = 1; ui16ColValue = ui16SubColVal = 0; // Kanji or above if (ucCharSet >= 0x2B) { // Puts 2 or above into high byte. ui16ColValue = ui16WpChar - 0x2900; // No subcollation or case bits need to be set goto Exit; } // Single wide character? (HANKAKU) if (ucCharSet < 11) { // Get the values from a non-asian character // LATIN, GREEK or CYRILLIC // The width bit may have been set on a jump to // label from below. Latin_Greek_Cyrillic: // YES: Pass US_LANG because this is what we want - // Prevents double character sorting. ui16ColValue = fwpGetCollation( ui16WpChar, US_LANG); if (uiUppercaseFlag || fwpIsUpper( ui16WpChar)) { // Uppercase - set case bit ucCaseBits |= SET_CASE_BIT; } // Character for which there is no collation value? if (ui16ColValue == COLS0) { ui16ReturnValue = 0; if (!fwpIsUpper( ui16WpChar)) { // Convert to uppercase ui16WpChar--; } ui16ColValue = 0xFFFF; ui16SubColVal = ui16WpChar; } else if (ucCharSet) // Don't bother with ascii { if (!fwpIsUpper( ui16WpChar)) { // Convert to uppercase ui16WpChar--; } if (ucCharSet == CHSMUL1) { FLMUINT16 ui16Base; FLMUINT16 ui16Diacritic; ui16SubColVal = !fwpCh6Brkcar( ui16WpChar, &ui16Base, &ui16Diacritic) ? fwp_dia60Tbl[ ui16Diacritic & 0xFF] : ui16WpChar; } else if (ucCharSet == CHSGREK) // GREEK { if (ui16WpChar >= 0x834 || // [8,52] or above ui16WpChar == 0x804 || // [8,4] BETA Medial | Terminal ui16WpChar == 0x826) // [8,38] SIGMA terminal ui16SubColVal = ui16WpChar; } else if (ucCharSet == CHSCYR) // CYRILLIC { if (ui16WpChar >= 0xA90) // [10, 144] or above { ui16SubColVal = ui16WpChar; // Dup collation values } } // else don't need a sub collation value } goto Exit; } // Single wide Japanese character? if (ucCharSet == 11) { FLMUINT16 ui16KanaChar; // Convert charset 11 to Zenkaku (double wide) CS24 or CS26 hex. // All characters in charset 11 will convert to CS24 or CS26. // when combining the collation and the sub-collation values. if (HanToZenkaku( ui16WpChar, ui16NextWpChar, &ui16KanaChar ) == 2) { // Return 2 ui16ReturnValue++; } ucCaseBits |= SET_WIDTH_BIT; // Set so will allow to go back ui16WpChar = ui16KanaChar; // If in CS24 will fall through to ZenKaku ucCharSet = ui16KanaChar >> 8; ucCharVal = ui16KanaChar & 0xFF; } if (ui16WpChar < 0x2400) { // In some other character set goto Latin_Greek_Cyrillic; } else if (ui16WpChar >= 0x255e && // Hiragana? ui16WpChar <= 0x2655) // Katakana? { if (ui16WpChar >= 0x2600) { ucCaseBits |= SET_KATAKANA_BIT; } // HIRAGANA & KATAKANA // Kana contains both hiragana and katakana. // The tables contain the same characters in same order if (ucCharSet == 0x25) { // Change value to be in character set 26 ucCharVal -= 0x5E; } ui16ColValue = 0x0100 + KanaColTbl[ ucCharVal ]; ui16SubColVal = KanaSubColTbl[ ucCharVal ]; goto Exit; } // ZenKaku - means any double wide character // Hankaku - single wide character // Inputs: 0x2400..2559 symbols..latin - Zenkaku // 0x265B..2750 greek..cyrillic - Zenkaku // SET_WIDTH_BIT may have been set if original char // was in 11 and got converted to CS24. [1,2,5,27(extendedVowel),53,54] // Original chars from CS11 will have some collation value that when // combined with the sub-collation value will format a character in // CS24. The width bit will then convert back to CS11. if ((ui16Hankaku = ZenToHankaku( ui16WpChar, (FLMUINT16 *) 0 )) != 0) { if ((ui16Hankaku >> 8) != 11) // if CharSet11 was a CS24 symbol { ui16WpChar = ui16Hankaku; // May be CS24 symbol/latin/gk/cy ucCharSet = ui16WpChar >> 8; ucCharVal = ui16WpChar & 0xFF; ucCaseBits |= SET_WIDTH_BIT; // Latin symbols double wide goto Latin_Greek_Cyrillic; } } // 0x2400..0x24bc Japanese symbols that cannot be converted to Hankaku. // All 6 original symbol chars from 11 will also be here. // First try to find a collation value of the symbol. // The sub-collation value will be the position in the CS24 table + 1. for (uiLoop = 0; uiLoop < (sizeof(fwp_Ch24ColTbl) / sizeof(BYTE_WORD_TBL)); uiLoop++ ) { if (ucCharVal == fwp_Ch24ColTbl[ uiLoop].ByteValue) { if ((ui16ColValue = fwp_Ch24ColTbl[ uiLoop].WordValue) < 0x100) { // Don't save for chuuten, dakuten, handakuten ui16SubColVal = (FLMUINT16)(uiLoop + 1); } break; } } if (!ui16ColValue) { // Now see if it's a repeat or repeat-vowel character if( (((ucCharVal >= 0x12) && (ucCharVal <= 0x15)) || (ucCharVal == 0x17) || (ucCharVal == 0x18)) && ((ui16PrevColValue >> 8) == 1)) { ui16ColValue = ui16PrevColValue; // Store original WP character ui16SubColVal = ui16WpChar; } else if( (ucCharVal == 0x1B) && // repeat vowel? (ui16PrevColValue >= 0x100) && (ui16PrevColValue < COLS_ASIAN_MARKS)) // Previous kana char? { ui16ColValue = 0x0100 + KanaColToVowel[ ui16PrevColValue & 0xFF ]; // Store original WP character ui16SubColVal = ui16WpChar; } else { ui16ReturnValue = 0; ui16ColValue = 0xFFFF; // No collation value ui16SubColVal = ui16WpChar; // Never have changed if gets here } } Exit: // Set return values *pui16ColValue = ui16ColValue; *pui16SubColVal = ui16SubColVal; *pucCaseBits = ucCaseBits; return( ui16ReturnValue); } /**************************************************************************** Desc: Convert a zenkaku (double wide) char to a hankaku (single wide) char Ret: Hankaku char or 0 if a conversion doesn't exist Notes: Taken from CHAR.ASM - zen2han_f routine ****************************************************************************/ FLMUINT16 ZenToHankaku( FLMUINT16 ui16WpChar, FLMUINT16 * DakutenOrHandakutenRV ) { FLMUINT16 ui16Hankaku = 0; FLMBYTE ucCharSet = ui16WpChar >> 8; FLMBYTE ucCharVal = ui16WpChar & 0xFF; FLMUINT uiLoop; switch (ucCharSet) { // SYMBOLS case 0x24: for (uiLoop = 0; uiLoop < (sizeof(Zen24ToHankaku) / sizeof(BYTE_WORD_TBL)); uiLoop++) { // List is sorted so table entry is more you are done if (Zen24ToHankaku [uiLoop].ByteValue >= ucCharVal) { if (Zen24ToHankaku [uiLoop].ByteValue == ucCharVal) { ui16Hankaku = Zen24ToHankaku [uiLoop].WordValue; } break; } } break; // ROMAN - 0x250F..2559 // Hiragana - 0x255E..2580 case 0x25: if (ucCharVal >= 0x0F && ucCharVal < 0x5E) { ui16Hankaku = ucCharVal + 0x21; } break; // Katakana - 0x2600..2655 // Greek - 0x265B..2695 case 0x26: if (ucCharVal <= 0x55) // Katakana range { FLMBYTE ucCS11CharVal; FLMUINT16 ui16NextWpChar = 0; if ((ucCS11CharVal = MapCS26ToCharSet11[ ucCharVal ]) != 0xFF) { if (ucCS11CharVal & 0x80) { if( ucCS11CharVal & 0x40) { // Handakuten voicing ui16NextWpChar = 0xB3E; } else { // Dakuten voicing ui16NextWpChar = 0xB3D; } ucCS11CharVal &= 0x3F; } ui16Hankaku = 0x0b00 + ucCS11CharVal; if( ui16NextWpChar && DakutenOrHandakutenRV ) { *DakutenOrHandakutenRV = ui16NextWpChar; } } } else if (ucCharVal <= 0x95) // Greek { FLMBYTE ucGreekChar = ucCharVal; // Make a zero based number. ucGreekChar -= 0x5E; // Check for lowercase if( ucGreekChar >= 0x20) { // Convert to upper case for now ucGreekChar -= 0x20; } if (ucGreekChar >= 2) { ucGreekChar++; } if (ucGreekChar >= 19) { ucGreekChar++; } // Convert to character set 8 ui16Hankaku = (ucGreekChar << 1) + 0x800; if (ucCharVal >= (0x5E + 0x20)) { // Adjust to lower case character ui16Hankaku++; } } break; // Cyrillic case 0x27: // Uppercase? if (ucCharVal <= 0x20) { ui16Hankaku = (ucCharVal << 1) + 0xa00; } else if (ucCharVal >= 0x30 && ucCharVal <= 0x50) { // Lower case ui16Hankaku = ((ucCharVal - 0x30) << 1) + 0xa01; } break; } return( ui16Hankaku); } /**************************************************************************** Desc: Convert a WPChar from hankaku (single wide) to zenkaku (double wide). 1) Used to see if a char in CS11 can map to a double wide character 2) Used to convert keys into original data. Ret: 0 = no conversion 1 = converted character to zenkaku 2 = ui16NextWpChar dakuten or handakuten voicing got combined Notes: Taken from char.asm - han2zen() From8ToZen could be taken out and placed in code. ****************************************************************************/ FLMUINT16 HanToZenkaku( FLMUINT16 ui16WpChar, FLMUINT16 ui16NextWpChar, FLMUINT16 * pui16Zenkaku) { FLMUINT16 ui16Zenkaku = 0; FLMBYTE ucCharSet = ui16WpChar >> 8; FLMBYTE ucCharVal = ui16WpChar & 0xFF; FLMUINT uiLoop; FLMUINT16 ui16CharsUsed = 1; switch( ucCharSet) { // Character set 0 - symbols case 0: // Invalid? - all others are used. if (ucCharVal < 0x20) { ; } else if (ucCharVal <= 0x2F) { // Symbols A ui16Zenkaku = 0x2400 + From0AToZen[ ucCharVal - 0x20 ]; } else if (ucCharVal <= 0x39) { // 0..9 ui16Zenkaku = 0x2500 + (ucCharVal - 0x21); } else if (ucCharVal <= 0x40) { // Symbols B ui16Zenkaku = 0x2400 + From0BToZen[ ucCharVal - 0x3A ]; } else if (ucCharVal <= 0x5A) { // A..Z ui16Zenkaku = 0x2500 + (ucCharVal - 0x21); } else if (ucCharVal <= 0x60) { // Symbols C ui16Zenkaku = 0x2400 + From0CToZen[ ucCharVal - 0x5B ]; } else if (ucCharVal <= 0x7A) { // a..z ui16Zenkaku = 0x2500 + (ucCharVal - 0x21); } else if (ucCharVal <= 0x7E) { // Symbols D ui16Zenkaku = 0x2400 + From0DToZen[ ucCharVal - 0x7B ]; } break; // GREEK case 8: if ((ucCharVal >= sizeof( From8ToZen)) || ((ui16Zenkaku = 0x2600 + From8ToZen[ ucCharVal ]) == 0x26FF)) { ui16Zenkaku = 0; } break; // CYRILLIC case 10: // Check range ui16Zenkaku = 0x2700 + (ucCharVal >> 1); // Uppercase value // Convert to lower case? if( ucCharVal & 0x01) { ui16Zenkaku += 0x30; } break; // JAPANESE case 11: if (ucCharVal < 5) { ui16Zenkaku = 0x2400 + From11AToZen[ ucCharVal ]; } else if (ucCharVal < 0x3D) // katakana? { if ((ui16Zenkaku = 0x2600 + From11BToZen[ ucCharVal - 5 ]) == 0x26FF) { // Dash - convert to this ui16Zenkaku = 0x241b; } else { if (ui16NextWpChar == 0xB3D) // dakuten? - voicing { // First check exception(s) then // check if voicing exists! - will NOT access out of table if ((ui16Zenkaku != 0x2652) && // is not 'N'? (KanaSubColTbl[ ui16Zenkaku - 0x2600 + 1 ] == 3)) { ui16Zenkaku++; // Return 2 ui16CharsUsed++; } } else if (ui16NextWpChar == 0xB3E) // handakuten? - voicing { // Check if voicing exists! - will NOT access out of table if (KanaSubColTbl [ui16Zenkaku - 0x2600 + 2 ] == 5) { ui16Zenkaku += 2; // Return 2 ui16CharsUsed++; } } } } else if (ucCharVal == 0x3D) // dakuten? { // Convert to voicing symbol ui16Zenkaku = 0x240A; } else if (ucCharVal == 0x3E) // handakuten? { // Convert to voicing symbol ui16Zenkaku = 0x240B; } // else cannot convert break; // Other character sets // CS 1,4,5,6 - symbols default: // Instead of includes more tables from char.asm - look down the // Zen24Tohankaku[] table for a matching value - not much slower. for (uiLoop = 0; uiLoop < (sizeof(Zen24ToHankaku) / sizeof(BYTE_WORD_TBL)); uiLoop++) { if (Zen24ToHankaku[ uiLoop].WordValue == ui16WpChar) { ui16Zenkaku = 0x2400 + Zen24ToHankaku[ uiLoop].ByteValue; break; } } break; } if (!ui16Zenkaku) { // Change return value ui16CharsUsed = 0; } *pui16Zenkaku = ui16Zenkaku; return( ui16CharsUsed); }